From b469ad6c49eb3505a5e8cb9fcaf21186129f84c2 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 27 Jul 2023 23:45:18 +0200 Subject: [PATCH 01/13] nlohmann_json refactor --- CMakeLists.txt | 2 +- benchmark/CMakeLists.txt | 4 +- benchmark/blas/blas.cpp | 21 +- benchmark/blas/blas_common.hpp | 247 +-- benchmark/blas/distributed/multi_vector.cpp | 30 +- .../CMakeLists.txt | 2 +- benchmark/conversion/conversion.cpp | 194 ++ benchmark/conversions/conversions.cpp | 223 -- .../matrix_generator/matrix_generator.cpp | 36 +- .../matrix_statistics/matrix_statistics.cpp | 183 +- benchmark/preconditioner/preconditioner.cpp | 238 +-- benchmark/solver/distributed/solver.cpp | 32 +- benchmark/solver/solver.cpp | 21 +- benchmark/solver/solver_common.hpp | 406 ++-- benchmark/sparse_blas/operations.cpp | 13 +- benchmark/sparse_blas/operations.hpp | 8 +- benchmark/sparse_blas/sparse_blas.cpp | 191 +- benchmark/spmv/distributed/spmv.cpp | 67 +- benchmark/spmv/spmv.cpp | 33 +- benchmark/spmv/spmv_common.hpp | 289 ++- benchmark/test/reference/blas.profile.stderr | 69 +- benchmark/test/reference/blas.simple.stderr | 69 +- .../test/reference/conversion.all.stderr | 1862 +---------------- .../test/reference/conversion.all.stdout | 74 +- .../test/reference/conversion.matrix.stderr | 42 +- .../test/reference/conversion.matrix.stdout | 16 +- .../test/reference/conversion.profile.stderr | 98 +- .../test/reference/conversion.profile.stdout | 19 +- .../test/reference/conversion.simple.stderr | 42 +- .../test/reference/conversion.simple.stdout | 19 +- .../distributed_solver.matrix.stdout | 3 +- .../distributed_solver.profile.stderr | 8 +- .../distributed_solver.profile.stdout | 6 +- .../distributed_solver.simple.stdout | 6 +- .../reference/matrix_statistics.matrix.stderr | 2 +- .../reference/matrix_statistics.matrix.stdout | 4 +- .../reference/matrix_statistics.simple.stderr | 2 +- .../reference/matrix_statistics.simple.stdout | 7 +- .../reference/preconditioner.matrix.stderr | 33 +- .../reference/preconditioner.matrix.stdout | 4 +- .../reference/preconditioner.profile.stderr | 29 +- .../reference/preconditioner.profile.stdout | 7 +- .../reference/preconditioner.simple.stderr | 33 +- .../reference/preconditioner.simple.stdout | 7 +- benchmark/test/reference/solver.matrix.stdout | 3 +- .../test/reference/solver.profile.stderr | 8 +- .../test/reference/solver.profile.stdout | 6 +- benchmark/test/reference/solver.simple.stdout | 6 +- .../test/reference/sparse_blas.matrix.stderr | 29 +- .../test/reference/sparse_blas.profile.stderr | 23 +- .../test/reference/sparse_blas.simple.stderr | 30 +- benchmark/test/reference/spmv.matrix.stderr | 21 +- benchmark/test/reference/spmv.matrix.stdout | 5 +- benchmark/test/reference/spmv.profile.stderr | 32 +- benchmark/test/reference/spmv.profile.stdout | 6 +- benchmark/test/reference/spmv.simple.stderr | 21 +- benchmark/test/reference/spmv.simple.stdout | 6 +- benchmark/utils/general.hpp | 342 +-- benchmark/utils/general_matrix.hpp | 18 +- benchmark/utils/generator.hpp | 118 +- benchmark/utils/iteration_control.hpp | 326 +++ benchmark/utils/json.hpp | 63 +- benchmark/utils/loggers.hpp | 100 +- benchmark/utils/runner.hpp | 209 ++ benchmark/utils/spmv_validation.hpp | 83 - third_party/CMakeLists.txt | 4 +- third_party/nlohmann_json/CMakeLists.txt | 9 + third_party/rapidjson/CMakeLists.txt | 14 - 68 files changed, 1864 insertions(+), 4319 deletions(-) rename benchmark/{conversions => conversion}/CMakeLists.txt (88%) create mode 100644 benchmark/conversion/conversion.cpp delete mode 100644 benchmark/conversions/conversions.cpp create mode 100644 benchmark/utils/iteration_control.hpp create mode 100644 benchmark/utils/runner.hpp delete mode 100644 benchmark/utils/spmv_validation.hpp create mode 100644 third_party/nlohmann_json/CMakeLists.txt delete mode 100644 third_party/rapidjson/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c9f22b4db1..a483f09a0d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -256,7 +256,7 @@ if(GINKGO_BUILD_TESTS) endif() if(GINKGO_BUILD_BENCHMARKS) find_package(gflags 2.2.2 QUIET) - find_package(RapidJSON 1.1.0 QUIET) + find_package(nlohmann_json 3.9.1 QUIET) endif() # System provided, third party libraries (not bundled!) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 44a0a3d1d9e..e993ee6cf0c 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -57,7 +57,7 @@ endfunction() # All remaining arguments will be treated as source files function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def type) add_executable("${name}" ${ARGN}) - target_link_libraries("${name}" ginkgo gflags rapidjson) + target_link_libraries("${name}" ginkgo gflags nlohmann_json::nlohmann_json) # always include the device timer if (GINKGO_BUILD_CUDA) target_compile_definitions("${name}" PRIVATE HAS_CUDA_TIMER=1) @@ -149,7 +149,7 @@ if (GINKGO_BUILD_MPI) endif() add_subdirectory(blas) -add_subdirectory(conversions) +add_subdirectory(conversion) add_subdirectory(matrix_generator) add_subdirectory(matrix_statistics) add_subdirectory(preconditioner) diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp index 11228ed5818..f7ad8120a80 100644 --- a/benchmark/blas/blas.cpp +++ b/benchmark/blas/blas.cpp @@ -130,26 +130,17 @@ Parameters for a benchmark case are: stride_B: stride for B matrix in gemm (optional, default m) stride_C: stride for C matrix in gemm (optional, default m) )"; - std::string format = example_config; + std::string format = Generator::get_example_config(); initialize_argument_parsing(&argc, &argv, header, format); - std::string extra_information = - "The operations are " + FLAGS_operations + "\n"; + std::string extra_information = "The operations are " + FLAGS_operations; print_general_information(extra_information); auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - std::cerr - << "Input has to be a JSON array of benchmark configurations:\n" - << format; - std::exit(1); - } + auto test_cases = json::parse(get_input_stream()); - run_blas_benchmarks(exec, get_timer(exec, FLAGS_gpu_timer), operation_map, - test_cases, true); + run_test_cases(BlasBenchmark{operation_map}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/blas/blas_common.hpp b/benchmark/blas/blas_common.hpp index fe0110f82fb..88819a043b0 100644 --- a/benchmark/blas/blas_common.hpp +++ b/benchmark/blas/blas_common.hpp @@ -43,7 +43,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/general.hpp" +#include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/timer.hpp" #include "benchmark/utils/types.hpp" #include "core/components/prefix_sum_kernels.hpp" @@ -70,14 +72,6 @@ DEFINE_string( "C has dimensions n x m and x and y have dimensions n x r"); -std::string example_config = R"( - [ - { "n": 100 }, - { "n": 200, "m": 200, "k": 200 } - ] -)"; - - class BenchmarkOperation { public: virtual ~BenchmarkOperation() = default; @@ -404,70 +398,101 @@ struct dimensions { }; -dimensions parse_dims(rapidjson::Value& test_case) -{ - auto get_optional = [](rapidjson::Value& obj, const char* name, - gko::size_type default_value) -> gko::size_type { - if (obj.HasMember(name)) { - return obj[name].GetUint64(); - } else { - return default_value; - } - }; - - dimensions result; - result.n = test_case["n"].GetInt64(); - result.k = get_optional(test_case, "k", result.n); - result.m = get_optional(test_case, "m", result.n); - result.r = get_optional(test_case, "r", 1); - if (test_case.HasMember("stride")) { - result.stride_x = test_case["stride"].GetInt64(); - result.stride_y = result.stride_x; - } else { - result.stride_x = get_optional(test_case, "stride_x", result.r); - result.stride_y = get_optional(test_case, "stride_y", result.r); +struct BlasBenchmark : Benchmark { + using map_type = + std::map( + std::shared_ptr, dimensions)>>; + map_type operation_map; + std::vector operations; + std::string name; + bool do_print; + + BlasBenchmark(map_type operation_map, bool do_print = true) + : operation_map{std::move(operation_map)}, + name{"blas"}, + operations{split(FLAGS_operations)}, + do_print{do_print} + {} + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return operations; } - result.stride_A = get_optional(test_case, "stride_A", result.k); - result.stride_B = get_optional(test_case, "stride_B", result.m); - result.stride_C = get_optional(test_case, "stride_C", result.m); - return result; -} + bool should_print() const override { return do_print; } -std::string describe(rapidjson::Value& test_case) -{ - std::stringstream ss; - auto optional_output = [&](const char* name) { - if (test_case.HasMember(name) && test_case[name].IsInt64()) { - ss << name << " = " << test_case[name].GetInt64() << " "; - } - }; - optional_output("n"); - optional_output("k"); - optional_output("m"); - optional_output("r"); - optional_output("stride"); - optional_output("stride_x"); - optional_output("stride_y"); - optional_output("stride_A"); - optional_output("stride_B"); - optional_output("stride_C"); - return ss.str(); -} + std::string get_example_config() const override + { + return json::parse(R"([{"n": 100}, {"n": 200, "m": 200, "k": 200}])") + .dump(4); + } + bool validate_config(const json& value) const override + { + return value.contains("n") && value["n"].is_number_integer(); + } -template -void apply_blas(const char* operation_name, std::shared_ptr exec, - std::shared_ptr timer, const OpMap& operation_map, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& blas_case = test_case["blas"]; - add_or_set_member(blas_case, operation_name, - rapidjson::Value(rapidjson::kObjectType), allocator); + std::string describe_config(const json& test_case) const override + { + std::stringstream ss; + auto optional_output = [&](const char* name) { + if (test_case.contains(name) && + test_case[name].is_number_integer()) { + ss << name << " = " << test_case[name].get() << " "; + } + }; + optional_output("n"); + optional_output("k"); + optional_output("m"); + optional_output("r"); + optional_output("stride"); + optional_output("stride_x"); + optional_output("stride_y"); + optional_output("stride_A"); + optional_output("stride_B"); + optional_output("stride_C"); + return ss.str(); + } + + dimensions setup(std::shared_ptr exec, + json& test_case) const override + { + auto get_optional = [](json& obj, const char* name, + gko::size_type default_value) -> gko::size_type { + if (obj.contains(name)) { + return obj[name].get(); + } else { + return default_value; + } + }; + + dimensions result; + result.n = test_case["n"].get(); + result.k = get_optional(test_case, "k", result.n); + result.m = get_optional(test_case, "m", result.n); + result.r = get_optional(test_case, "r", 1); + if (test_case.contains("stride")) { + result.stride_x = test_case["stride"].get(); + result.stride_y = result.stride_x; + } else { + result.stride_x = get_optional(test_case, "stride_x", result.r); + result.stride_y = get_optional(test_case, "stride_y", result.r); + } + result.stride_A = get_optional(test_case, "stride_A", result.k); + result.stride_B = get_optional(test_case, "stride_B", result.m); + result.stride_C = get_optional(test_case, "stride_C", result.m); + return result; + } - auto op = operation_map.at(operation_name)(exec, parse_dims(test_case)); + + void run(std::shared_ptr exec, std::shared_ptr timer, + dimensions& dims, const std::string& operation_name, + json& operation_case) const override + { + auto op = operation_map.at(operation_name)(exec, dims); IterationControl ic(timer); @@ -488,89 +513,9 @@ void apply_blas(const char* operation_name, std::shared_ptr exec, const auto flops = static_cast(op->get_flops()); const auto mem = static_cast(op->get_memory()); const auto repetitions = ic.get_num_repetitions(); - add_or_set_member(blas_case[operation_name], "time", runtime, - allocator); - add_or_set_member(blas_case[operation_name], "flops", flops / runtime, - allocator); - add_or_set_member(blas_case[operation_name], "bandwidth", mem / runtime, - allocator); - add_or_set_member(blas_case[operation_name], "repetitions", repetitions, - allocator); - - // compute and write benchmark data - add_or_set_member(blas_case[operation_name], "completed", true, - allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case["blas"][operation_name], "completed", false, - allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["blas"][operation_name], "error", - msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; - } -} - - -template -void run_blas_benchmarks(std::shared_ptr exec, - std::shared_ptr timer, - const OpMap& operation_map, - rapidjson::Document& test_cases, bool do_print) -{ - auto operations = split(FLAGS_operations, ','); - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); + operation_case["time"] = runtime; + operation_case["flops"] = flops / runtime; + operation_case["bandwidth"] = mem / runtime; + operation_case["repetitions"] = repetitions; } - auto annotate = annotate_functor{profiler_hook}; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - if (!test_case.HasMember("blas")) { - test_case.AddMember("blas", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& blas_case = test_case["blas"]; - if (!FLAGS_overwrite && - all_of(begin(operations), end(operations), - [&blas_case](const std::string& s) { - return blas_case.HasMember(s.c_str()); - })) { - continue; - } - if (do_print) { - std::clog << "Running test case\n" << test_case << std::endl; - } - // annotate the test case - auto test_case_range = annotate(describe(test_case)); - for (const auto& operation_name : operations) { - { - auto operation_range = annotate(operation_name.c_str()); - apply_blas(operation_name.c_str(), exec, timer, - operation_map, test_case, allocator); - } - - if (do_print) { - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - - backup_results(test_cases); - } - } - } catch (const std::exception& e) { - std::cerr << "Error setting up benchmark, what(): " << e.what() - << std::endl; - } - } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } -} +}; diff --git a/benchmark/blas/distributed/multi_vector.cpp b/benchmark/blas/distributed/multi_vector.cpp index be326b08b96..d95e5fb38ac 100644 --- a/benchmark/blas/distributed/multi_vector.cpp +++ b/benchmark/blas/distributed/multi_vector.cpp @@ -50,6 +50,10 @@ int main(int argc, char* argv[]) { gko::experimental::mpi::environment mpi_env{argc, argv}; + const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); + const auto rank = comm.rank(); + const auto do_print = rank == 0; + std::string header = R"(" A benchmark for measuring performance of Ginkgo's BLAS-like " operations. @@ -60,13 +64,10 @@ Parameters for a benchmark case are: stride_x: stride for input vector x (optional, default r) stride_y: stride for in/out vector y (optional, default r) )"; - std::string format = example_config; - initialize_argument_parsing(&argc, &argv, header, format); + std::string format = Generator::get_example_config(); + initialize_argument_parsing(&argc, &argv, header, format, do_print); - const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); - const auto rank = comm.rank(); - - if (rank == 0) { + if (do_print) { std::string extra_information = "The operations are " + FLAGS_operations; print_general_information(extra_information); @@ -75,14 +76,7 @@ Parameters for a benchmark case are: auto exec = executor_factory_mpi.at(FLAGS_executor)(comm.get()); std::string json_input = broadcast_json_input(get_input_stream(), comm); - rapidjson::Document test_cases; - test_cases.Parse(json_input.c_str()); - if (!test_cases.IsArray()) { - std::cerr - << "Input has to be a JSON array of benchmark configurations:\n" - << format; - std::exit(1); - } + auto test_cases = json::parse(json_input); std::map( @@ -130,10 +124,10 @@ Parameters for a benchmark case are: exec, Generator{comm, {}}, dims.n, dims.r, dims.stride_y); }}}; - run_blas_benchmarks(exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer), - operation_map, test_cases, rank == 0); + run_test_cases(BlasBenchmark{operation_map, do_print}, exec, + get_mpi_timer(exec, comm, FLAGS_gpu_timer), test_cases); - if (rank == 0) { - std::cout << test_cases << std::endl; + if (do_print) { + std::cout << std::setw(4) << test_cases << std::endl; } } diff --git a/benchmark/conversions/CMakeLists.txt b/benchmark/conversion/CMakeLists.txt similarity index 88% rename from benchmark/conversions/CMakeLists.txt rename to benchmark/conversion/CMakeLists.txt index 21dd363d3c0..7ecf578c055 100644 --- a/benchmark/conversions/CMakeLists.txt +++ b/benchmark/conversion/CMakeLists.txt @@ -1 +1 @@ -ginkgo_add_typed_benchmark_executables(conversion "NO" conversions.cpp) +ginkgo_add_typed_benchmark_executables(conversion "NO" conversion.cpp) diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp new file mode 100644 index 00000000000..b9a5d5c46d6 --- /dev/null +++ b/benchmark/conversion/conversion.cpp @@ -0,0 +1,194 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "benchmark/utils/formats.hpp" +#include "benchmark/utils/general_matrix.hpp" +#include "benchmark/utils/generator.hpp" +#include "benchmark/utils/iteration_control.hpp" +#include "benchmark/utils/runner.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" + + +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING + + +using Generator = DefaultSystemGenerator<>; + + +struct ConversionBenchmark : Benchmark> { + std::string name; + std::vector operations; + + ConversionBenchmark() : name{"conversion"} + { + auto ref_exec = gko::ReferenceExecutor::create(); + auto formats = split(FLAGS_formats); + for (const auto& from_format : formats) { + operations.push_back(from_format + "-read"); + auto from_mtx = + formats::matrix_type_factory.at(from_format)(ref_exec); + // all pairs of conversions that are supported by Ginkgo + for (const auto& to_format : formats) { + if (from_format == to_format) { + continue; + } + auto to_mtx = + formats::matrix_type_factory.at(to_format)(ref_exec); + try { + to_mtx->copy_from(from_mtx); + operations.push_back(from_format + "-" + to_format); + } catch (const std::exception& e) { + } + } + } + } + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return operations; + } + + bool should_print() const override { return true; } + + std::string get_example_config() const override + { + return Generator::get_example_config(); + } + + bool validate_config(const json& test_case) const override + { + return Generator::validate_config(test_case); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } + + gko::matrix_data setup(std::shared_ptr exec, + json& test_case) const override + { + gko::matrix_data data; + data = Generator::generate_matrix_data(test_case); + std::clog << "Matrix is of size (" << data.size[0] << ", " + << data.size[1] << "), " << data.nonzeros.size() << std::endl; + test_case["rows"] = data.size[0]; + test_case["cols"] = data.size[1]; + test_case["nonzeros"] = data.nonzeros.size(); + return data; + } + + + void run(std::shared_ptr exec, std::shared_ptr timer, + gko::matrix_data& data, + const std::string& operation_name, + json& operation_case) const override + { + auto split_it = + std::find(operation_name.begin(), operation_name.end(), '-'); + std::string from_name{operation_name.begin(), split_it}; + std::string to_name{split_it + 1, operation_name.end()}; + auto mtx_from = formats::matrix_type_factory.at(from_name)(exec); + auto readable = + gko::as>(mtx_from.get()); + IterationControl ic{timer}; + if (to_name == "read") { + // warm run + for (auto _ : ic.warmup_run()) { + exec->synchronize(); + readable->read(data); + exec->synchronize(); + } + // timed run + for (auto _ : ic.run()) { + readable->read(data); + } + } else { + readable->read(data); + auto mtx_to = formats::matrix_type_factory.at(to_name)(exec); + + // warm run + for (auto _ : ic.warmup_run()) { + exec->synchronize(); + mtx_to->copy_from(mtx_from); + exec->synchronize(); + } + // timed run + for (auto _ : ic.run()) { + mtx_to->copy_from(mtx_from); + } + } + operation_case["time"] = ic.compute_time(FLAGS_timer_method); + operation_case["repetitions"] = ic.get_num_repetitions(); + } +}; + + +int main(int argc, char* argv[]) +{ + std::string header = + "A benchmark for measuring performance of Ginkgo's conversions.\n"; + std::string format_str = Generator::get_example_config(); + initialize_argument_parsing_matrix(&argc, &argv, header, format_str); + + std::string extra_information = + std::string() + "The formats are " + FLAGS_formats; + print_general_information(extra_information); + + auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); + auto formats = split(FLAGS_formats, ','); + + auto test_cases = json::parse(get_input_stream()); + + run_test_cases(ConversionBenchmark{}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); + + std::cout << std::setw(4) << test_cases << std::endl; +} diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp deleted file mode 100644 index d9684321e2d..00000000000 --- a/benchmark/conversions/conversions.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2023, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "benchmark/utils/formats.hpp" -#include "benchmark/utils/general_matrix.hpp" -#include "benchmark/utils/generator.hpp" -#include "benchmark/utils/spmv_validation.hpp" -#include "benchmark/utils/timer.hpp" -#include "benchmark/utils/types.hpp" - - -#ifdef GINKGO_BENCHMARK_ENABLE_TUNING -#include "benchmark/utils/tuning_variables.hpp" -#endif // GINKGO_BENCHMARK_ENABLE_TUNING - - -// This function supposes that management of `FLAGS_overwrite` is done before -// calling it -void convert_matrix(const gko::LinOp* matrix_from, const char* format_to, - const char* conversion_name, - std::shared_ptr exec, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& conversion_case = test_case["conversions"]; - add_or_set_member(conversion_case, conversion_name, - rapidjson::Value(rapidjson::kObjectType), allocator); - - gko::matrix_data data{gko::dim<2>{1, 1}, 1}; - auto matrix_to = share(formats::matrix_factory(format_to, exec, data)); - - auto timer = get_timer(exec, FLAGS_gpu_timer); - IterationControl ic{timer}; - - // warm run - for (auto _ : ic.warmup_run()) { - exec->synchronize(); - matrix_to->copy_from(matrix_from); - exec->synchronize(); - matrix_to->clear(); - } - // timed run - for (auto _ : ic.run()) { - matrix_to->copy_from(matrix_from); - } - add_or_set_member(conversion_case[conversion_name], "time", - ic.compute_time(FLAGS_timer_method), allocator); - add_or_set_member(conversion_case[conversion_name], "repetitions", - ic.get_num_repetitions(), allocator); - - // compute and write benchmark data - add_or_set_member(conversion_case[conversion_name], "completed", true, - allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case["conversions"][conversion_name], - "completed", false, allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["conversions"][conversion_name], - "error", msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; - } -} - - -int main(int argc, char* argv[]) -{ - std::string header = - "A benchmark for measuring performance of Ginkgo's conversions.\n"; - std::string format_str = example_config; - initialize_argument_parsing_matrix(&argc, &argv, header, format_str); - - std::string extra_information = - std::string() + "The formats are " + FLAGS_formats + "\n"; - print_general_information(extra_information); - - auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); - auto formats = split(FLAGS_formats, ','); - - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } - - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = annotate_functor{profiler_hook}; - - DefaultSystemGenerator<> generator{}; - - for (auto& test_case : test_cases.GetArray()) { - std::clog << "Benchmarking conversions. " << std::endl; - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember("conversions")) { - test_case.AddMember("conversions", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& conversion_case = test_case["conversions"]; - - std::clog << "Running test case\n" << test_case << std::endl; - gko::matrix_data data; - try { - data = generator.generate_matrix_data(test_case); - } catch (std::exception& e) { - std::cerr << "Error setting up matrix data, what(): " << e.what() - << std::endl; - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case, "error", msg_value, allocator); - } - continue; - } - std::clog << "Matrix is of size (" << data.size[0] << ", " - << data.size[1] << ")" << std::endl; - add_or_set_member(test_case, "size", data.size[0], allocator); - // annotate the test case - auto test_case_range = annotate(generator.describe_config(test_case)); - for (const auto& format_from : formats) { - try { - auto matrix_from = - share(formats::matrix_factory(format_from, exec, data)); - for (const auto& format_to : formats) { - if (format_from == format_to) { - continue; - } - auto conversion_name = - std::string(format_from) + "-" + format_to; - - if (!FLAGS_overwrite && - conversion_case.HasMember(conversion_name.c_str())) { - continue; - } - { - auto conversion_range = - annotate(conversion_name.c_str()); - convert_matrix(matrix_from.get(), format_to.c_str(), - conversion_name.c_str(), exec, test_case, - allocator); - } - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - } - backup_results(test_cases); - } catch (const gko::AllocationError& e) { - for (const auto& format : formats::matrix_type_factory) { - const auto format_to = std::get<0>(format); - auto conversion_name = - std::string(format_from) + "-" + format_to; - add_or_set_member( - test_case["conversions"][conversion_name.c_str()], - "completed", false, allocator); - } - std::cerr << "Error when allocating data for type " - << format_from << ". what(): " << e.what() - << std::endl; - backup_results(test_cases); - } catch (const std::exception& e) { - std::cerr << "Error when running benchmark, what(): " - << e.what() << std::endl; - } - } - } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } - - std::cout << test_cases << std::endl; -} diff --git a/benchmark/matrix_generator/matrix_generator.cpp b/benchmark/matrix_generator/matrix_generator.cpp index 138b5a9c2ce..193d95f897f 100644 --- a/benchmark/matrix_generator/matrix_generator.cpp +++ b/benchmark/matrix_generator/matrix_generator.cpp @@ -85,31 +85,33 @@ std::string input_format = // clang-format on -void validate_option_object(const rapidjson::Value& value) +void validate_option_object(const json& value) { - if (!value.IsObject() || !value.HasMember("filename") || - !value["filename"].IsString() || !value.HasMember("problem") || - !value["problem"].IsObject() || !value["problem"].HasMember("type") || - !value["problem"]["type"].IsString()) { + if (!value.is_object() || !value.contains("filename") || + !value["filename"].is_string() || !value.contains("problem") || + !value["problem"].is_object() || !value["problem"].contains("type") || + !value["problem"]["type"].is_string()) { print_config_error_and_exit(2); } } using generator_function = std::function( - rapidjson::Value&, std::default_random_engine&)>; + json&, std::default_random_engine&)>; // matrix generators gko::matrix_data generate_block_diagonal( - rapidjson::Value& config, std::default_random_engine& engine) + json& config, std::default_random_engine& engine) { - if (!config.HasMember("num_blocks") || !config["num_blocks"].IsUint() || - !config.HasMember("block_size") || !config["block_size"].IsUint()) { + if (!config.contains("num_blocks") || + !config["num_blocks"].is_number_unsigned() || + !config.contains("block_size") || + !config["block_size"].is_number_unsigned()) { print_config_error_and_exit(2); } - auto num_blocks = config["num_blocks"].GetUint(); - auto block_size = config["block_size"].GetUint(); + auto num_blocks = config["num_blocks"].get(); + auto block_size = config["block_size"].get(); auto block = gko::matrix_data( gko::dim<2>(block_size), std::uniform_real_distribution(-1.0, 1.0), engine); @@ -132,20 +134,18 @@ int main(int argc, char* argv[]) std::clog << gko::version_info::get() << std::endl; auto engine = get_engine(); - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document configurations; - configurations.ParseStream(jcin); + auto configurations = json::parse(get_input_stream()); - if (!configurations.IsArray()) { + if (!configurations.is_array()) { print_config_error_and_exit(1); } - for (auto& config : configurations.GetArray()) { + for (auto& config : configurations) { try { validate_option_object(config); std::clog << "Generating matrix: " << config << std::endl; - auto filename = config["filename"].GetString(); - auto type = config["problem"]["type"].GetString(); + auto filename = config["filename"].get(); + auto type = config["problem"]["type"].get(); auto mdata = generator[type](config["problem"], engine); std::ofstream ofs(filename); gko::write_raw(ofs, mdata, gko::layout_type::coordinate); diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp index fccf4391ad5..40c505c7627 100644 --- a/benchmark/matrix_statistics/matrix_statistics.cpp +++ b/benchmark/matrix_statistics/matrix_statistics.cpp @@ -38,9 +38,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #include "benchmark/utils/general_matrix.hpp" #include "benchmark/utils/generator.hpp" -#include "benchmark/utils/spmv_validation.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/types.hpp" @@ -51,9 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // See en.wikipedia.org/wiki/Five-number_summary // Quartile computation uses Method 3 from en.wikipedia.org/wiki/Quartile -void compute_summary(const std::vector& dist, - rapidjson::Value& out, - rapidjson::MemoryPoolAllocator<>& allocator) +void compute_summary(const std::vector& dist, json& out) { const auto q = dist.size() / 4; const auto r = dist.size() % 4; @@ -72,23 +73,14 @@ void compute_summary(const std::vector& dist, }; // clang-format on - add_or_set_member(out, "min", dist[0], allocator); - add_or_set_member( - out, "q1", - coefs[r][0] * static_cast(dist[positions[r][0]]) + - coefs[r][1] * static_cast(dist[positions[r][1]]), - allocator); - add_or_set_member( - out, "median", - coefs[r][2] * static_cast(dist[positions[r][2]]) + - coefs[r][3] * static_cast(dist[positions[r][3]]), - allocator); - add_or_set_member( - out, "q3", - coefs[r][4] * static_cast(dist[positions[r][4]]) + - coefs[r][5] * static_cast(dist[positions[r][5]]), - allocator); - add_or_set_member(out, "max", dist[dist.size() - 1], allocator); + out["min"] = dist.front(); + out["q1"] = coefs[r][0] * static_cast(dist[positions[r][0]]) + + coefs[r][1] * static_cast(dist[positions[r][1]]); + out["median"] = coefs[r][2] * static_cast(dist[positions[r][2]]) + + coefs[r][3] * static_cast(dist[positions[r][3]]); + out["q3"] = coefs[r][4] * static_cast(dist[positions[r][4]]) + + coefs[r][5] * static_cast(dist[positions[r][5]]); + out["max"] = dist.back(); } @@ -108,39 +100,30 @@ double compute_moment(int degree, const std::vector& dist, // See en.wikipedia.org/wiki/Moment_(mathematics) -void compute_moments(const std::vector& dist, - rapidjson::Value& out, - rapidjson::MemoryPoolAllocator<>& allocator) +void compute_moments(const std::vector& dist, json& out) { const auto mean = compute_moment(1, dist); - add_or_set_member(out, "mean", mean, allocator); + out["mean"] = mean; const auto variance = compute_moment(2, dist, mean); - add_or_set_member(out, "variance", variance, allocator); + out["variance"] = variance; const auto dev = std::sqrt(variance); - add_or_set_member(out, "skewness", compute_moment(3, dist, mean, dev), - allocator); - add_or_set_member(out, "kurtosis", compute_moment(4, dist, mean, dev), - allocator); - add_or_set_member(out, "hyperskewness", compute_moment(5, dist, mean, dev), - allocator); - add_or_set_member(out, "hyperflatness", compute_moment(6, dist, mean, dev), - allocator); + out["skewness"] = compute_moment(3, dist, mean, dev); + out["kurtosis"] = compute_moment(4, dist, mean, dev); + out["hyperskewness"] = compute_moment(5, dist, mean, dev); + out["hyperflatness"] = compute_moment(6, dist, mean, dev); } -template void compute_distribution_properties(const std::vector& dist, - rapidjson::Value& out, - Allocator& allocator) + json& out) { - compute_summary(dist, out, allocator); - compute_moments(dist, out, allocator); + compute_summary(dist, out); + compute_moments(dist, out); } -template void extract_matrix_statistics(gko::matrix_data& data, - rapidjson::Value& problem, Allocator& allocator) + json& problem) { std::vector row_dist(data.size[0]); std::vector col_dist(data.size[1]); @@ -149,72 +132,90 @@ void extract_matrix_statistics(gko::matrix_data& data, ++col_dist[v.column]; } - add_or_set_member(problem, "rows", data.size[0], allocator); - add_or_set_member(problem, "columns", data.size[1], allocator); - add_or_set_member(problem, "nonzeros", data.nonzeros.size(), allocator); + problem["rows"] = data.size[0]; + problem["columns"] = data.size[1]; + problem["nonzeros"] = data.nonzeros.size(); std::sort(begin(row_dist), end(row_dist)); - add_or_set_member(problem, "row_distribution", - rapidjson::Value(rapidjson::kObjectType), allocator); - compute_distribution_properties(row_dist, problem["row_distribution"], - allocator); + problem["row_distribution"] = json::object(); + compute_distribution_properties(row_dist, problem["row_distribution"]); std::sort(begin(col_dist), end(col_dist)); - add_or_set_member(problem, "col_distribution", - rapidjson::Value(rapidjson::kObjectType), allocator); - compute_distribution_properties(col_dist, problem["col_distribution"], - allocator); + problem["col_distribution"] = json::object(); + compute_distribution_properties(col_dist, problem["col_distribution"]); } -int main(int argc, char* argv[]) -{ - std::string header = - "A utility that collects additional statistical properties of the " - "matrix.\n"; - std::string format = example_config; - initialize_argument_parsing_matrix(&argc, &argv, header, format); +using Generator = DefaultSystemGenerator; - std::clog << gko::version_info::get() << std::endl; - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } +struct MatrixStatistics : Benchmark { + std::string name; + std::vector empty; - auto& allocator = test_cases.GetAllocator(); + MatrixStatistics() : name{"problem"} {} - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember("problem")) { - test_case.AddMember("problem", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& problem = test_case["problem"]; + const std::string& get_name() const override { return name; } - std::clog << "Running test case\n" << test_case << std::endl; + const std::vector& get_operations() const override + { + return empty; + } - auto matrix = - DefaultSystemGenerator::generate_matrix_data( - test_case); + bool should_print() const override { return true; } - std::clog << "Matrix is of size (" << matrix.size[0] << ", " - << matrix.size[1] << ")" << std::endl; - add_or_set_member(test_case, "size", matrix.size[0], allocator); + std::string get_example_config() const override + { + return Generator::get_example_config(); + } - extract_matrix_statistics(matrix, test_case["problem"], allocator); + bool validate_config(const json& test_case) const override + { + return Generator::validate_config(test_case); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } - backup_results(test_cases); - } catch (const std::exception& e) { - std::cerr << "Error extracting statistics, what(): " << e.what() - << std::endl; - } + int setup(std::shared_ptr exec, + json& test_case) const override + { + auto data = Generator::generate_matrix_data(test_case); + std::clog << "Matrix is of size (" << data.size[0] << ", " + << data.size[1] << "), " << data.nonzeros.size() << std::endl; + test_case["rows"] = data.size[0]; + test_case["cols"] = data.size[1]; + test_case["nonzeros"] = data.nonzeros.size(); + + extract_matrix_statistics(data, test_case["problem"]); + return 0; } - std::cout << test_cases << std::endl; + + void run(std::shared_ptr exec, std::shared_ptr timer, + int& data, const std::string& operation_name, + json& operation_case) const override + {} +}; + + +int main(int argc, char* argv[]) +{ + std::string header = + "A utility that collects additional statistical properties of the " + "matrix.\n"; + std::string format = Generator::get_example_config(); + initialize_argument_parsing_matrix(&argc, &argv, header, format); + + std::clog << gko::version_info::get() << std::endl; + + auto test_cases = json::parse(get_input_stream()); + auto exec = gko::ReferenceExecutor::create(); + + run_test_cases(MatrixStatistics{}, exec, get_timer(exec, false), + test_cases); + + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index e7859e992dc..7c130328d34 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -43,9 +43,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general_matrix.hpp" #include "benchmark/utils/generator.hpp" +#include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/preconditioners.hpp" -#include "benchmark/utils/spmv_validation.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/timer.hpp" #include "benchmark/utils/types.hpp" @@ -128,34 +129,85 @@ std::string encode_parameters(const char* precond_name) } -void run_preconditioner(const char* precond_name, - std::shared_ptr exec, - std::shared_ptr system_matrix, - const vec* b, const vec* x, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& precond_object = test_case["preconditioner"]; - auto encoded_name = encode_parameters(precond_name); +struct preconditioner_benchmark_state { + std::unique_ptr x; + std::unique_ptr b; + std::shared_ptr system_matrix; +}; + + +using Generator = DefaultSystemGenerator<>; + - if (!FLAGS_overwrite && - precond_object.HasMember(encoded_name.c_str())) { - return; +struct PreconditionerBenchmark : Benchmark { + std::string name; + std::vector preconditioners; + std::map precond_decoder; + + PreconditionerBenchmark() + : name{"preconditioner"}, preconditioners{split(FLAGS_preconditioners)} + { + for (auto precond : split(FLAGS_preconditioners)) { + preconditioners.push_back(encode_parameters(precond.c_str())); + precond_decoder[preconditioners.back()] = precond; } + } + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return preconditioners; + } + + bool should_print() const override { return true; } + + bool validate_config(const json& value) const override + { + return Generator::validate_config(value); + } - add_or_set_member(precond_object, encoded_name.c_str(), - rapidjson::Value(rapidjson::kObjectType), allocator); - auto& this_precond_data = precond_object[encoded_name.c_str()]; + std::string get_example_config() const override + { + return Generator::get_example_config(); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } - add_or_set_member(this_precond_data, "generate", - rapidjson::Value(rapidjson::kObjectType), allocator); - add_or_set_member(this_precond_data, "apply", - rapidjson::Value(rapidjson::kObjectType), allocator); + preconditioner_benchmark_state setup(std::shared_ptr exec, + json& test_case) const override + { + preconditioner_benchmark_state state; + auto data = Generator::generate_matrix_data(test_case); + + state.system_matrix = + formats::matrix_factory(FLAGS_formats, exec, data); + state.b = Generator::create_multi_vector_random(exec, data.size[0]); + state.x = Generator::create_multi_vector(exec, data.size[0], + gko::zero()); + + std::clog << "Matrix is of size (" << data.size[0] << ", " + << data.size[1] << "), " << data.nonzeros.size() << std::endl; + test_case["rows"] = data.size[0]; + test_case["cols"] = data.size[1]; + test_case["nonzeros"] = data.nonzeros.size(); + return state; + } + + + void run(std::shared_ptr exec, std::shared_ptr timer, + preconditioner_benchmark_state& state, + const std::string& encoded_precond_name, + json& precond_case) const override + { + auto decoded_precond_name = precond_decoder.at(encoded_precond_name); + precond_case["generate"] = json::object(); + precond_case["apply"] = json::object(); for (auto stage : {"generate", "apply"}) { - add_or_set_member(this_precond_data[stage], "components", - rapidjson::Value(rapidjson::kObjectType), - allocator); + precond_case[stage]["components"] = json::object(); } IterationControl ic_gen{get_timer(exec, FLAGS_gpu_timer)}; @@ -163,54 +215,51 @@ void run_preconditioner(const char* precond_name, { // fast run, gets total time - auto x_clone = clone(x); - - auto precond = precond_factory.at(precond_name)(exec); + auto x_clone = clone(state.x); + auto precond = precond_factory.at(decoded_precond_name)(exec); for (auto _ : ic_apply.warmup_run()) { - precond->generate(system_matrix)->apply(b, x_clone); + precond->generate(state.system_matrix)->apply(state.b, x_clone); } std::unique_ptr precond_op; for (auto _ : ic_gen.run()) { - precond_op = precond->generate(system_matrix); + precond_op = precond->generate(state.system_matrix); } - add_or_set_member(this_precond_data["generate"], "time", - ic_gen.compute_time(FLAGS_timer_method), - allocator); - add_or_set_member(this_precond_data["generate"], "repetitions", - ic_gen.get_num_repetitions(), allocator); + precond_case["generate"]["time"] = + ic_gen.compute_time(FLAGS_timer_method); + precond_case["generate"]["repetitions"] = + ic_gen.get_num_repetitions(); for (auto _ : ic_apply.run()) { - precond_op->apply(b, x_clone); + precond_op->apply(state.b, x_clone); } - add_or_set_member(this_precond_data["apply"], "time", - ic_apply.compute_time(FLAGS_timer_method), - allocator); - add_or_set_member(this_precond_data["apply"], "repetitions", - ic_apply.get_num_repetitions(), allocator); + precond_case["apply"]["time"] = + ic_apply.compute_time(FLAGS_timer_method); + precond_case["apply"]["repetitions"] = + ic_apply.get_num_repetitions(); } if (FLAGS_detailed) { // slow run, times each component separately - auto x_clone = clone(x); - auto precond = precond_factory.at(precond_name)(exec); + auto x_clone = clone(state.x); + auto precond = precond_factory.at(decoded_precond_name)(exec); std::unique_ptr precond_op; { auto gen_logger = create_operations_logger( FLAGS_gpu_timer, FLAGS_nested_names, exec, - this_precond_data["generate"]["components"], allocator, + precond_case["generate"]["components"], ic_gen.get_num_repetitions()); exec->add_logger(gen_logger); if (exec->get_master() != exec) { exec->get_master()->add_logger(gen_logger); } for (auto i = 0u; i < ic_gen.get_num_repetitions(); ++i) { - precond_op = precond->generate(system_matrix); + precond_op = precond->generate(state.system_matrix); } if (exec->get_master() != exec) { exec->get_master()->remove_logger(gen_logger); @@ -220,39 +269,22 @@ void run_preconditioner(const char* precond_name, auto apply_logger = create_operations_logger( FLAGS_gpu_timer, FLAGS_nested_names, exec, - this_precond_data["apply"]["components"], allocator, + precond_case["apply"]["components"], ic_apply.get_num_repetitions()); exec->add_logger(apply_logger); if (exec->get_master() != exec) { exec->get_master()->add_logger(apply_logger); } for (auto i = 0u; i < ic_apply.get_num_repetitions(); ++i) { - precond_op->apply(b, x_clone); + precond_op->apply(state.b, x_clone); } if (exec->get_master() != exec) { exec->get_master()->remove_logger(apply_logger); } exec->remove_logger(apply_logger); } - - add_or_set_member(this_precond_data, "completed", true, allocator); - } catch (const std::exception& e) { - auto encoded_name = encode_parameters(precond_name); - add_or_set_member(test_case["preconditioner"], encoded_name.c_str(), - rapidjson::Value(rapidjson::kObjectType), allocator); - add_or_set_member(test_case["preconditioner"][encoded_name.c_str()], - "completed", false, allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["preconditioner"][encoded_name.c_str()], - "error", msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; } -} +}; int main(int argc, char* argv[]) @@ -261,11 +293,11 @@ int main(int argc, char* argv[]) FLAGS_formats = "csr"; std::string header = "A benchmark for measuring preconditioner performance.\n"; - std::string format = example_config; + std::string format = Generator::get_example_config(); initialize_argument_parsing_matrix(&argc, &argv, header, format); std::string extra_information = - "Running with preconditioners: " + FLAGS_preconditioners + "\n"; + "Running with preconditioners: " + FLAGS_preconditioners; print_general_information(extra_information); auto exec = get_executor(FLAGS_gpu_timer); @@ -279,76 +311,10 @@ int main(int argc, char* argv[]) std::exit(1); } - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(get_input_stream()); - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = annotate_functor{profiler_hook}; - DefaultSystemGenerator<> generator{}; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember("preconditioner")) { - test_case.AddMember("preconditioner", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& precond_object = test_case["preconditioner"]; - if (!FLAGS_overwrite && - all_of(begin(preconditioners), end(preconditioners), - [&precond_object](const std::string& s) { - return precond_object.HasMember(s.c_str()); - })) { - continue; - } - std::clog << "Running test case\n" << test_case << std::endl; - - // annotate the test case - auto test_case_range = - annotate(generator.describe_config(test_case)); - - auto data = generator.generate_matrix_data(test_case); - - auto system_matrix = - share(formats::matrix_factory(FLAGS_formats, exec, data)); - auto b = generator.create_multi_vector_random( - exec, system_matrix->get_size()[0]); - auto x = generator.create_multi_vector( - exec, system_matrix->get_size()[0], gko::zero()); - - std::clog << "Matrix is of size (" << system_matrix->get_size()[0] - << ", " << system_matrix->get_size()[1] << ")" - << std::endl; - add_or_set_member(test_case, "size", data.size[0], allocator); - for (const auto& precond_name : preconditioners) { - { - auto precond_range = annotate(precond_name.c_str()); - run_preconditioner(precond_name.c_str(), exec, - system_matrix, b.get(), x.get(), - test_case, allocator); - } - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - backup_results(test_cases); - } - } catch (const std::exception& e) { - std::cerr << "Error setting up preconditioner, what(): " << e.what() - << std::endl; - } - } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } + run_test_cases(PreconditionerBenchmark{}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/solver/distributed/solver.cpp b/benchmark/solver/distributed/solver.cpp index a9b1f9c1c93..d691309ab6a 100644 --- a/benchmark/solver/distributed/solver.cpp +++ b/benchmark/solver/distributed/solver.cpp @@ -52,7 +52,7 @@ struct Generator : public DistributedDefaultSystemGenerator { std::unique_ptr generate_rhs(std::shared_ptr exec, const gko::LinOp* system_matrix, - rapidjson::Value& config) const + json& config) const { return Vec::create( exec, comm, gko::dim<2>{system_matrix->get_size()[0], FLAGS_nrhs}, @@ -82,9 +82,13 @@ int main(int argc, char* argv[]) FLAGS_repetitions = "1"; FLAGS_min_repetitions = 1; + const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); + const auto rank = comm.rank(); + const auto do_print = rank == 0; + std::string header = "A benchmark for measuring Ginkgo's distributed solvers\n"; - std::string format = example_config + R"( + std::string format = solver_example_config + R"( The matrix will either be read from an input file if the filename parameter is given, or generated as a stencil matrix. If the filename parameter is given, all processes will read the file and @@ -100,10 +104,7 @@ int main(int argc, char* argv[]) )"; std::string additional_json = R"(,"optimal":{"spmv":"csr-csr"})"; initialize_argument_parsing_matrix(&argc, &argv, header, format, - additional_json); - - const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); - const auto rank = comm.rank(); + additional_json, do_print); auto exec = executor_factory_mpi.at(FLAGS_executor)(comm.get()); @@ -114,8 +115,8 @@ int main(int argc, char* argv[]) "Running " + FLAGS_solvers + " with " + std::to_string(FLAGS_max_iters) + " iterations and residual goal of " + ss_rel_res_goal.str() + "\nThe number of right hand sides is " + - std::to_string(FLAGS_nrhs) + "\n"; - if (rank == 0) { + std::to_string(FLAGS_nrhs); + if (do_print) { print_general_information(extra_information); } @@ -136,17 +137,12 @@ int main(int argc, char* argv[]) "optimal": {"spmv": "csr-csr"}] )" : broadcast_json_input(get_input_stream(), comm); - rapidjson::Document test_cases; - test_cases.Parse(json_input.c_str()); - - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(json_input); - run_solver_benchmarks(exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer), - test_cases, Generator(comm), rank == 0); + run_test_cases(SolverBenchmark{Generator{comm}}, exec, + get_mpi_timer(exec, comm, FLAGS_gpu_timer), test_cases); - if (rank == 0) { - std::cout << test_cases << std::endl; + if (do_print) { + std::cout << std::setw(4) << test_cases << std::endl; } } diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 4efc5558a8e..b656102e5df 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -58,7 +58,7 @@ int main(int argc, char* argv[]) FLAGS_min_repetitions = 1; std::string header = "A benchmark for measuring performance of Ginkgo's solvers.\n"; - std::string format = example_config + R"( + std::string format = solver_example_config + R"( "optimal":"spmv" can be one of the recognized spmv formats )"; std::string additional_json = R"(,"optimal":{"spmv":"csr"})"; @@ -72,29 +72,24 @@ int main(int argc, char* argv[]) "Running " + FLAGS_solvers + " with " + std::to_string(FLAGS_max_iters) + " iterations and residual goal of " + ss_rel_res_goal.str() + "\nThe number of right hand sides is " + - std::to_string(FLAGS_nrhs) + "\n"; + std::to_string(FLAGS_nrhs); print_general_information(extra_information); auto exec = get_executor(FLAGS_gpu_timer); - rapidjson::Document test_cases; + json test_cases; if (!FLAGS_overhead) { - rapidjson::IStreamWrapper jcin(get_input_stream()); - test_cases.ParseStream(jcin); + test_cases = json::parse(get_input_stream()); } else { // Fake test case to run once auto overhead_json = std::string() + " [{\"filename\": \"overhead.mtx\", \"optimal\": " "{ \"spmv\": \"csr\"}}]"; - test_cases.Parse(overhead_json.c_str()); + test_cases = json::parse(overhead_json); } - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } - - run_solver_benchmarks(exec, get_timer(exec, FLAGS_gpu_timer), test_cases, - SolverGenerator{}, true); + run_test_cases(SolverBenchmark{SolverGenerator{}}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp index ae9ae6dc1fb..4976e5759d4 100644 --- a/benchmark/solver/solver_common.hpp +++ b/benchmark/solver/solver_common.hpp @@ -37,8 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/generator.hpp" +#include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/preconditioners.hpp" +#include "benchmark/utils/runner.hpp" #ifdef GINKGO_BENCHMARK_ENABLE_TUNING @@ -107,7 +109,7 @@ DEFINE_bool(overhead, false, "If set, uses dummy data to benchmark Ginkgo overhead"); -std::string example_config = R"( +std::string solver_example_config = R"( [ {"filename": "my_file.mtx", "optimal": {"spmv": "ell-csr"}, "rhs": "my_file_rhs.mtx"}, @@ -119,28 +121,6 @@ std::string example_config = R"( )"; -// input validation -[[noreturn]] void print_config_error_and_exit() -{ - std::cerr << "Input has to be a JSON array of solver configurations:\n" - << example_config << std::endl; - std::exit(1); -} - - -void validate_option_object(const rapidjson::Value& value) -{ - if (!value.IsObject() || - !((value.HasMember("size") && value.HasMember("stencil") && - value["size"].IsInt64() && value["stencil"].IsString()) || - (value.HasMember("filename") && value["filename"].IsString())) || - (!value.HasMember("optimal") && !value["optimal"].HasMember("spmv") && - !value["optimal"]["spmv"].IsString())) { - print_config_error_and_exit(); - } -} - - std::shared_ptr create_criterion( std::shared_ptr exec, std::uint32_t max_iters) { @@ -284,21 +264,17 @@ std::unique_ptr generate_solver( } -void write_precond_info(const gko::LinOp* precond, - rapidjson::Value& precond_info, - rapidjson::MemoryPoolAllocator<>& allocator) +void write_precond_info(const gko::LinOp* precond, json& precond_info) { if (const auto jacobi = dynamic_cast*>(precond)) { // extract block sizes const auto bdata = jacobi->get_parameters().block_pointers.get_const_data(); - add_or_set_member(precond_info, "block_sizes", - rapidjson::Value(rapidjson::kArrayType), allocator); + precond_info["block_sizes"] = json::array(); const auto nblocks = jacobi->get_num_blocks(); for (auto i = decltype(nblocks){0}; i < nblocks; ++i) { - precond_info["block_sizes"].PushBack(bdata[i + 1] - bdata[i], - allocator); + precond_info["block_sizes"].push_back(bdata[i + 1] - bdata[i]); } // extract block precisions @@ -306,24 +282,19 @@ void write_precond_info(const gko::LinOp* precond, jacobi->get_parameters() .storage_optimization.block_wise.get_const_data(); if (pdata) { - add_or_set_member(precond_info, "block_precisions", - rapidjson::Value(rapidjson::kArrayType), - allocator); + precond_info["block_precisions"] = json::array(); for (auto i = decltype(nblocks){0}; i < nblocks; ++i) { - precond_info["block_precisions"].PushBack( - static_cast(pdata[i]), allocator); + precond_info["block_precisions"].push_back( + static_cast(pdata[i])); } } // extract condition numbers const auto cdata = jacobi->get_conditioning(); if (cdata) { - add_or_set_member(precond_info, "block_conditioning", - rapidjson::Value(rapidjson::kArrayType), - allocator); + precond_info["block_conditioning"] = json::array(); for (auto i = decltype(nblocks){0}; i < nblocks; ++i) { - precond_info["block_conditioning"].PushBack(cdata[i], - allocator); + precond_info["block_conditioning"].push_back(cdata[i]); } } } @@ -335,10 +306,10 @@ struct SolverGenerator : DefaultSystemGenerator<> { std::unique_ptr generate_rhs(std::shared_ptr exec, const gko::LinOp* system_matrix, - rapidjson::Value& config) const + json& config) const { - if (config.HasMember("rhs")) { - std::ifstream rhs_fd{config["rhs"].GetString()}; + if (config.contains("rhs")) { + std::ifstream rhs_fd{config["rhs"].get()}; return gko::read(rhs_fd, std::move(exec)); } else { gko::dim<2> vec_size{system_matrix->get_size()[0], FLAGS_nrhs}; @@ -399,45 +370,112 @@ struct SolverGenerator : DefaultSystemGenerator<> { }; -template -void solve_system(const std::string& solver_name, - const std::string& precond_name, - const char* precond_solver_name, - std::shared_ptr exec, - std::shared_ptr timer, - std::shared_ptr system_matrix, - const VectorType* b, const VectorType* x, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& solver_case = test_case["solver"]; - if (!FLAGS_overwrite && solver_case.HasMember(precond_solver_name)) { - return; +template +struct solver_benchmark_state { + using Vec = typename Generator::Vec; + std::shared_ptr system_matrix; + std::unique_ptr b; + std::unique_ptr x; +}; + + +template +struct SolverBenchmark : Benchmark> { + std::string name; + std::vector precond_solvers; + std::map> decoder; + Generator generator; + + SolverBenchmark(Generator generator) : name{"solver"}, generator{generator} + { + auto solvers = split(FLAGS_solvers, ','); + auto preconds = split(FLAGS_preconditioners, ','); + for (const auto& s : solvers) { + for (const auto& p : preconds) { + precond_solvers.push_back(s + (p == "none" ? "" : "-" + p)); + decoder[precond_solvers.back()] = {s, p}; + } + } + } + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return precond_solvers; + } + + bool should_print() const override { return true; } + + std::string get_example_config() const override + { + return solver_example_config; + } + + bool validate_config(const json& value) const override + { + return ((value.contains("size") && value.contains("stencil") && + value["size"].is_number_integer() && + value["stencil"].is_string()) || + (value.contains("filename") && + value["filename"].is_string())) && + (value.contains("optimal") && + value["optimal"].contains("spmv") && + value["optimal"]["spmv"].is_string()); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } + + solver_benchmark_state setup(std::shared_ptr exec, + json& test_case) const override + { + solver_benchmark_state state; + + if (FLAGS_overhead) { + state.system_matrix = generator.initialize({1.0}, exec); + state.b = generator.initialize( + {std::numeric_limits::quiet_NaN()}, exec); + state.x = generator.initialize({0.0}, exec); + } else { + state.system_matrix = + generator.generate_matrix_with_optimal_format(exec, test_case); + state.b = generator.generate_rhs(exec, state.system_matrix.get(), + test_case); + state.x = generator.generate_initial_guess( + exec, state.system_matrix.get(), state.b.get()); } - add_or_set_member(solver_case, precond_solver_name, - rapidjson::Value(rapidjson::kObjectType), allocator); - auto& solver_json = solver_case[precond_solver_name]; - add_or_set_member(solver_json, "recurrent_residuals", - rapidjson::Value(rapidjson::kArrayType), allocator); - add_or_set_member(solver_json, "true_residuals", - rapidjson::Value(rapidjson::kArrayType), allocator); - add_or_set_member(solver_json, "implicit_residuals", - rapidjson::Value(rapidjson::kArrayType), allocator); - add_or_set_member(solver_json, "iteration_timestamps", - rapidjson::Value(rapidjson::kArrayType), allocator); - if (b->get_size()[1] == 1 && !FLAGS_overhead) { - auto rhs_norm = compute_norm2(b); - add_or_set_member(solver_json, "rhs_norm", rhs_norm, allocator); + std::clog << "Matrix is of size (" << state.system_matrix->get_size()[0] + << ", " << state.system_matrix->get_size()[1] << ")" + << std::endl; + test_case["rows"] = state.system_matrix->get_size()[0]; + test_case["cols"] = state.system_matrix->get_size()[1]; + return state; + } + + + void run(std::shared_ptr exec, std::shared_ptr timer, + solver_benchmark_state& state, + const std::string& encoded_solver_name, + json& solver_case) const override + { + const auto decoded_pair = decoder.at(encoded_solver_name); + auto& solver_name = decoded_pair.first; + auto& precond_name = decoded_pair.second; + solver_case["recurrent_residuals"] = json::array(); + solver_case["true_residuals"] = json::array(); + solver_case["implicit_residuals"] = json::array(); + solver_case["iteration_timestamps"] = json::array(); + if (state.b->get_size()[1] == 1 && !FLAGS_overhead) { + auto rhs_norm = compute_norm2(state.b.get()); + solver_case["rhs_norm"] = rhs_norm; } for (auto stage : {"generate", "apply"}) { - add_or_set_member(solver_json, stage, - rapidjson::Value(rapidjson::kObjectType), - allocator); - add_or_set_member(solver_json[stage], "components", - rapidjson::Value(rapidjson::kObjectType), - allocator); + solver_case[stage] = json::object(); + solver_case[stage]["components"] = json::object(); } IterationControl ic{timer}; @@ -445,24 +483,24 @@ void solve_system(const std::string& solver_name, // warm run std::shared_ptr solver; for (auto _ : ic.warmup_run()) { - auto x_clone = clone(x); + auto x_clone = clone(state.x); auto precond = precond_factory.at(precond_name)(exec); solver = generate_solver(exec, give(precond), solver_name, FLAGS_warmup_max_iters) - ->generate(system_matrix); - solver->apply(b, x_clone); + ->generate(state.system_matrix); + solver->apply(state.b, x_clone); exec->synchronize(); } // detail run if (FLAGS_detailed && !FLAGS_overhead) { // slow run, get the time of each functions - auto x_clone = clone(x); + auto x_clone = clone(state.x); { auto gen_logger = create_operations_logger( FLAGS_gpu_timer, FLAGS_nested_names, exec, - solver_json["generate"]["components"], allocator, 1); + solver_case["generate"]["components"], 1); exec->add_logger(gen_logger); if (exec != exec->get_master()) { exec->get_master()->add_logger(gen_logger); @@ -471,7 +509,7 @@ void solve_system(const std::string& solver_name, auto precond = precond_factory.at(precond_name)(exec); solver = generate_solver(exec, give(precond), solver_name, FLAGS_max_iters) - ->generate(system_matrix); + ->generate(state.system_matrix); exec->remove_logger(gen_logger); if (exec != exec->get_master()) { @@ -481,25 +519,22 @@ void solve_system(const std::string& solver_name, if (auto prec = dynamic_cast(solver.get())) { - add_or_set_member(solver_json, "preconditioner", - rapidjson::Value(rapidjson::kObjectType), - allocator); + solver_case["preconditioner"] = json::object(); write_precond_info( clone(exec->get_master(), prec->get_preconditioner()).get(), - solver_json["preconditioner"], allocator); + solver_case["preconditioner"]); } { auto apply_logger = create_operations_logger( FLAGS_gpu_timer, FLAGS_nested_names, exec, - solver_json["apply"]["components"], allocator, 1); + solver_case["apply"]["components"], 1); exec->add_logger(apply_logger); if (exec != exec->get_master()) { exec->get_master()->add_logger(apply_logger); } - - solver->apply(b, x_clone); + solver->apply(state.b, x_clone); exec->remove_logger(apply_logger); if (exec != exec->get_master()) { @@ -508,17 +543,18 @@ void solve_system(const std::string& solver_name, } // slow run, gets the recurrent and true residuals of each iteration - if (b->get_size()[1] == 1) { - x_clone = clone(x); + if (state.b->get_size()[1] == 1) { + x_clone = clone(state.x); auto res_logger = std::make_shared>( - system_matrix, b, solver_json["recurrent_residuals"], - solver_json["true_residuals"], - solver_json["implicit_residuals"], - solver_json["iteration_timestamps"], allocator); + state.system_matrix, state.b, + solver_case["recurrent_residuals"], + solver_case["true_residuals"], + solver_case["implicit_residuals"], + solver_case["iteration_timestamps"]); solver->add_logger(res_logger); - solver->apply(b, x_clone); + solver->apply(state.b, x_clone); if (!res_logger->has_implicit_res_norms()) { - solver_json.RemoveMember("implicit_residuals"); + solver_case.erase("implicit_residuals"); } } exec->synchronize(); @@ -528,16 +564,16 @@ void solve_system(const std::string& solver_name, auto it_logger = std::make_shared(); auto generate_timer = get_timer(exec, FLAGS_gpu_timer); auto apply_timer = ic.get_timer(); - auto x_clone = clone(x); + auto x_clone = clone(state.x); for (auto status : ic.run(false)) { - x_clone = clone(x); + x_clone = clone(state.x); exec->synchronize(); generate_timer->tic(); auto precond = precond_factory.at(precond_name)(exec); solver = generate_solver(exec, give(precond), solver_name, FLAGS_max_iters) - ->generate(system_matrix); + ->generate(state.system_matrix); generate_timer->toc(); exec->synchronize(); @@ -545,165 +581,33 @@ void solve_system(const std::string& solver_name, solver->add_logger(it_logger); } apply_timer->tic(); - solver->apply(b, x_clone); + solver->apply(state.b, x_clone); apply_timer->toc(); if (ic.get_num_repetitions() == 0) { solver->remove_logger(it_logger); } } - it_logger->write_data(solver_json["apply"], allocator); + it_logger->write_data(solver_case["apply"]); - if (b->get_size()[1] == 1 && !FLAGS_overhead) { + if (state.b->get_size()[1] == 1 && !FLAGS_overhead) { // a solver is considered direct if it didn't log any iterations - if (solver_json["apply"].HasMember("iterations") && - solver_json["apply"]["iterations"].GetInt() == 0) { - auto error = - compute_direct_error(solver.get(), b, x_clone.get()); - add_or_set_member(solver_json, "forward_error", error, - allocator); - } - auto residual = - compute_residual_norm(system_matrix.get(), b, x_clone.get()); - add_or_set_member(solver_json, "residual_norm", residual, - allocator); - } - add_or_set_member(solver_json["generate"], "time", - generate_timer->compute_time(FLAGS_timer_method), - allocator); - add_or_set_member(solver_json["apply"], "time", - apply_timer->compute_time(FLAGS_timer_method), - allocator); - add_or_set_member(solver_json, "repetitions", - apply_timer->get_num_repetitions(), allocator); - - // compute and write benchmark data - add_or_set_member(solver_json, "completed", true, allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case["solver"][precond_solver_name], "completed", - false, allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["solver"][precond_solver_name], "error", - msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; - } -} - - -template -void run_solver_benchmarks(std::shared_ptr exec, - std::shared_ptr timer, - rapidjson::Document& test_cases, - const SystemGenerator& system_generator, - bool do_print) -{ - auto solvers = split(FLAGS_solvers, ','); - auto preconds = split(FLAGS_preconditioners, ','); - std::vector precond_solvers; - for (const auto& s : solvers) { - for (const auto& p : preconds) { - precond_solvers.push_back(s + (p == "none" ? "" : "-" + p)); - } - } - - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = annotate_functor{profiler_hook}; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember("solver")) { - test_case.AddMember("solver", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& solver_case = test_case["solver"]; - if (!FLAGS_overwrite && - all_of(begin(precond_solvers), end(precond_solvers), - [&solver_case](const std::string& s) { - return solver_case.HasMember(s.c_str()); - })) { - continue; - } - // annotate the test case - auto test_case_range = - annotate(system_generator.describe_config(test_case)); - - if (do_print) { - std::clog << "Running test case\n" << test_case << std::endl; - } - - using Vec = typename SystemGenerator::Vec; - std::shared_ptr system_matrix; - std::unique_ptr b; - std::unique_ptr x; - if (FLAGS_overhead) { - system_matrix = system_generator.initialize({1.0}, exec); - b = system_generator.initialize( - {std::numeric_limits::quiet_NaN()}, exec); - x = system_generator.initialize({0.0}, exec); - } else { - system_matrix = - system_generator.generate_matrix_with_optimal_format( - exec, test_case); - b = system_generator.generate_rhs(exec, system_matrix.get(), - test_case); - x = system_generator.generate_initial_guess( - exec, system_matrix.get(), b.get()); - } - - if (do_print) { - std::clog << "Matrix is of size (" - << system_matrix->get_size()[0] << ", " - << system_matrix->get_size()[1] << ")" << std::endl; - } - add_or_set_member(test_case, "size", system_matrix->get_size()[0], - allocator); - auto precond_solver_name = begin(precond_solvers); - for (const auto& solver_name : solvers) { - auto solver_range = annotate(solver_name.c_str()); - for (const auto& precond_name : preconds) { - if (do_print) { - std::clog - << "\tRunning solver: " << *precond_solver_name - << std::endl; - } - { - auto precond_range = annotate(precond_name.c_str()); - solve_system(solver_name, precond_name, - precond_solver_name->c_str(), exec, timer, - system_matrix, b.get(), x.get(), test_case, - allocator); - } - if (do_print) { - backup_results(test_cases); - } - ++precond_solver_name; - } - } - } catch (const std::exception& e) { - std::cerr << "Error setting up solver, what(): " << e.what() - << std::endl; - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case, "error", msg_value, allocator); + if (solver_case["apply"].contains("iterations") && + solver_case["apply"]["iterations"].get() == 0) { + auto error = compute_direct_error(solver.get(), state.b.get(), + x_clone.get()); + solver_case["forward_error"] = error; } + auto residual = compute_residual_norm(state.system_matrix.get(), + state.b.get(), x_clone.get()); + solver_case["residual_norm"] = residual; } + solver_case["generate"]["time"] = + generate_timer->compute_time(FLAGS_timer_method); + solver_case["apply"]["time"] = + apply_timer->compute_time(FLAGS_timer_method); + solver_case["repetitions"] = apply_timer->get_num_repetitions(); } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } -} +}; #endif // GINKGO_BENCHMARK_SOLVER_SOLVER_COMMON_HPP diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp index 66e5707c559..2ee766d4f83 100644 --- a/benchmark/sparse_blas/operations.cpp +++ b/benchmark/sparse_blas/operations.cpp @@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/sparse_blas/operations.hpp" -#include "benchmark/utils/json.hpp" #include "core/factorization/elimination_forest.hpp" #include "core/factorization/symbolic.hpp" #include "core/matrix/csr_kernels.hpp" @@ -632,11 +631,9 @@ class SymbolicLuOperation : public BenchmarkOperation { void run() override { gko::factorization::symbolic_lu(mtx_, result_); } - void write_stats(rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& allocator) override + void write_stats(json& object) override { - add_or_set_member(object, "factor_nonzeros", - result_->get_num_stored_elements(), allocator); + object["factor_nonzeros"] = result_->get_num_stored_elements(); } private: @@ -680,11 +677,9 @@ class SymbolicCholeskyOperation : public BenchmarkOperation { forest_); } - void write_stats(rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& allocator) override + void write_stats(json& object) override { - add_or_set_member(object, "factor_nonzeros", - result_->get_num_stored_elements(), allocator); + object["factor_nonzeros"] = result_->get_num_stored_elements(); } private: diff --git a/benchmark/sparse_blas/operations.hpp b/benchmark/sparse_blas/operations.hpp index 99cf72b8e59..48034eb8a1f 100644 --- a/benchmark/sparse_blas/operations.hpp +++ b/benchmark/sparse_blas/operations.hpp @@ -36,9 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include - - +#include "benchmark/utils/json.hpp" #include "benchmark/utils/types.hpp" @@ -79,9 +77,7 @@ class BenchmarkOperation { /** * Allows the operation to write arbitrary information to the JSON output. */ - virtual void write_stats(rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& allocator) - {} + virtual void write_stats(json& object) {} }; diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp index 8c054709fdf..21df4d9c448 100644 --- a/benchmark/sparse_blas/sparse_blas.cpp +++ b/benchmark/sparse_blas/sparse_blas.cpp @@ -47,7 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/sparse_blas/operations.hpp" #include "benchmark/utils/general_matrix.hpp" #include "benchmark/utils/generator.hpp" -#include "benchmark/utils/spmv_validation.hpp" +#include "benchmark/utils/iteration_control.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/types.hpp" #include "core/test/utils/matrix_generator.hpp" @@ -74,18 +75,64 @@ DEFINE_bool(validate, false, "against the ReferenceExecutor solution."); -void apply_sparse_blas(const char* operation_name, - std::shared_ptr exec, const Mtx* mtx, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - add_or_set_member(test_case, operation_name, - rapidjson::Value(rapidjson::kObjectType), allocator); +using Generator = DefaultSystemGenerator<>; + + +struct SparseBlasBenchmark : Benchmark> { + std::string name; + std::vector operations; + + SparseBlasBenchmark() + : name{"sparse_blas"}, operations{split(FLAGS_operations)} + {} + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return operations; + } + + bool should_print() const override { return true; } + + bool validate_config(const json& value) const override + { + return Generator::validate_config(value); + } + + std::string get_example_config() const override + { + return Generator::get_example_config(); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } + + std::unique_ptr setup(std::shared_ptr exec, + json& test_case) const override + { + auto data = Generator::generate_matrix_data(test_case); + data.ensure_row_major_order(); + std::clog << "Matrix is of size (" << data.size[0] << ", " + << data.size[1] << "), " << data.nonzeros.size() << std::endl; + test_case["rows"] = data.size[0]; + test_case["cols"] = data.size[1]; + test_case["nonzeros"] = data.nonzeros.size(); + + auto mtx = Mtx::create(exec, data.size, data.nonzeros.size()); + mtx->read(data); + return mtx; + } + - auto op = get_operation(operation_name, mtx); + void run(std::shared_ptr exec, std::shared_ptr timer, + std::unique_ptr& mtx, const std::string& operation_name, + json& operation_case) const override + { + auto op = get_operation(operation_name, mtx.get()); - auto timer = get_timer(exec, FLAGS_gpu_timer); IterationControl ic(timer); // warm run @@ -105,54 +152,30 @@ void apply_sparse_blas(const char* operation_name, const auto flops = static_cast(op->get_flops()); const auto mem = static_cast(op->get_memory()); const auto repetitions = ic.get_num_repetitions(); - add_or_set_member(test_case[operation_name], "time", runtime, - allocator); - add_or_set_member(test_case[operation_name], "flops", flops / runtime, - allocator); - add_or_set_member(test_case[operation_name], "bandwidth", mem / runtime, - allocator); - add_or_set_member(test_case[operation_name], "repetitions", repetitions, - allocator); + operation_case["time"] = runtime; + operation_case["flops"] = flops / runtime; + operation_case["bandwidth"] = mem / runtime; + operation_case["repetitions"] = repetitions; if (FLAGS_validate) { auto validation_result = op->validate(); - add_or_set_member(test_case[operation_name], "correct", - validation_result.first, allocator); - add_or_set_member(test_case[operation_name], "error", - validation_result.second, allocator); + operation_case["correct"] = validation_result.first; + operation_case["error"] = validation_result.second; } if (FLAGS_detailed) { - add_or_set_member(test_case[operation_name], "components", - rapidjson::Value(rapidjson::kObjectType), - allocator); + operation_case["components"] = json::object(); auto gen_logger = create_operations_logger( FLAGS_gpu_timer, FLAGS_nested_names, exec, - test_case[operation_name]["components"], allocator, - repetitions); + operation_case["components"], repetitions); exec->add_logger(gen_logger); for (unsigned i = 0; i < repetitions; i++) { op->run(); } exec->remove_logger(gen_logger); } - op->write_stats(test_case[operation_name], allocator); - - add_or_set_member(test_case[operation_name], "completed", true, - allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case[operation_name], "completed", false, - allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case[operation_name], "error", msg_value, - allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; + op->write_stats(operation_case); } -} +}; int main(int argc, char* argv[]) @@ -160,86 +183,18 @@ int main(int argc, char* argv[]) std::string header = "A benchmark for measuring performance of Ginkgo's sparse BLAS " "operations.\n"; - std::string format = example_config; + std::string format = Generator::get_example_config(); initialize_argument_parsing_matrix(&argc, &argv, header, format); auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(get_input_stream()); std::string extra_information = "The operations are " + FLAGS_operations; print_general_information(extra_information); - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = annotate_functor{profiler_hook}; - - auto operations = split(FLAGS_operations, ','); - - DefaultSystemGenerator<> generator{}; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember(benchmark_name)) { - test_case.AddMember(rapidjson::Value(benchmark_name, allocator), - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& sp_blas_case = test_case[benchmark_name]; - std::clog << "Running test case\n" << test_case << std::endl; - auto data = generator.generate_matrix_data(test_case); - data.ensure_row_major_order(); - std::clog << "Matrix is of size (" << data.size[0] << ", " - << data.size[1] << "), " << data.nonzeros.size() - << std::endl; - add_or_set_member(test_case, "rows", data.size[0], allocator); - add_or_set_member(test_case, "cols", data.size[1], allocator); - add_or_set_member(test_case, "nonzeros", data.nonzeros.size(), - allocator); - - auto mtx = Mtx::create(exec, data.size, data.nonzeros.size()); - mtx->read(data); - // annotate the test case - auto test_case_range = - annotate(generator.describe_config(test_case)); - for (const auto& operation_name : operations) { - if (FLAGS_overwrite || - !sp_blas_case.HasMember(operation_name.c_str())) { - { - auto operation_range = annotate(operation_name.c_str()); - apply_sparse_blas(operation_name.c_str(), exec, - mtx.get(), sp_blas_case, allocator); - } - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - backup_results(test_cases); - } - } - // write the output if we have no strategies - backup_results(test_cases); - } catch (const std::exception& e) { - std::cerr << "Error setting up matrix data, what(): " << e.what() - << std::endl; - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case, "error", msg_value, allocator); - } - } - } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } + run_test_cases(SparseBlasBenchmark{}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/spmv/distributed/spmv.cpp b/benchmark/spmv/distributed/spmv.cpp index 9b7e4ad8c8f..202aad15c7e 100644 --- a/benchmark/spmv/distributed/spmv.cpp +++ b/benchmark/spmv/distributed/spmv.cpp @@ -58,38 +58,7 @@ DEFINE_string(non_local_formats, "csr", "run. See the 'formats' option for a list of supported versions"); -std::string example_config = R"( - [ - {"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}, - {"filename": "my_file.mtx"} - ] -)"; - - -[[noreturn]] void print_config_error_and_exit() -{ - std::cerr << "Input has to be a JSON array of matrix configurations:\n" - << example_config << std::endl; - std::exit(1); -} - - -struct Generator : DistributedDefaultSystemGenerator> { - Generator(gko::experimental::mpi::communicator comm) - : DistributedDefaultSystemGenerator>{ - std::move(comm), {}} - {} - - void validate_options(const rapidjson::Value& options) const - { - if (!options.IsObject() || - !((options.HasMember("size") && options.HasMember("stencil") && - options.HasMember("comm_pattern")) || - options.HasMember("filename"))) { - print_config_error_and_exit(); - } - } -}; +using Generator = DistributedDefaultSystemGenerator>; int main(int argc, char* argv[]) @@ -98,18 +67,19 @@ int main(int argc, char* argv[]) const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); const auto rank = comm.rank(); + const auto do_print = rank == 0; std::string header = "A benchmark for measuring performance of Ginkgo's spmv.\n"; - std::string format = example_config; - initialize_argument_parsing_matrix(&argc, &argv, header, format); - - if (rank == 0) { - std::string extra_information = "The formats are [" + - FLAGS_local_formats + "]x[" + - FLAGS_non_local_formats + "]\n" + - "The number of right hand sides is " + - std::to_string(FLAGS_nrhs) + "\n"; + std::string format = Generator::get_example_config(); + initialize_argument_parsing_matrix(&argc, &argv, header, format, "", + do_print); + + if (do_print) { + std::string extra_information = + "The formats are [" + FLAGS_local_formats + "]x[" + + FLAGS_non_local_formats + "]\n" + + "The number of right hand sides is " + std::to_string(FLAGS_nrhs); print_general_information(extra_information); } @@ -125,16 +95,13 @@ int main(int argc, char* argv[]) } std::string json_input = broadcast_json_input(get_input_stream(), comm); - rapidjson::Document test_cases; - test_cases.Parse(json_input.c_str()); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(json_input); - run_spmv_benchmark(exec, test_cases, formats, Generator{comm}, - get_mpi_timer(exec, comm, FLAGS_gpu_timer), rank == 0); + run_test_cases(SpmvBenchmark{Generator{comm}, formats, do_print}, + exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer), + test_cases); - if (rank == 0) { - std::cout << test_cases << std::endl; + if (do_print) { + std::cout << std::setw(4) << test_cases << std::endl; } } diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp index 034437907c8..abd1b783019 100644 --- a/benchmark/spmv/spmv.cpp +++ b/benchmark/spmv/spmv.cpp @@ -41,48 +41,29 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general_matrix.hpp" #include "benchmark/utils/generator.hpp" -#include "benchmark/utils/spmv_validation.hpp" -struct Generator : DefaultSystemGenerator<> { - void validate_options(const rapidjson::Value& options) const - { - if (!options.IsObject() || - !((options.HasMember("size") && options.HasMember("stencil")) || - options.HasMember("filename"))) { - std::cerr - << "Input has to be a JSON array of matrix configurations:\n" - << example_config << std::endl; - std::exit(1); - } - } -}; +using Generator = DefaultSystemGenerator<>; int main(int argc, char* argv[]) { std::string header = "A benchmark for measuring performance of Ginkgo's spmv.\n"; - std::string format = example_config; + std::string format = Generator::get_example_config(); initialize_argument_parsing_matrix(&argc, &argv, header, format); std::string extra_information = "The formats are " + FLAGS_formats + "\nThe number of right hand sides is " + - std::to_string(FLAGS_nrhs) + "\n"; + std::to_string(FLAGS_nrhs); print_general_information(extra_information); auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); - auto formats = split(FLAGS_formats, ','); - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(get_input_stream()); - run_spmv_benchmark(exec, test_cases, formats, Generator{}, - get_timer(exec, FLAGS_gpu_timer), true); + run_test_cases(SpmvBenchmark{Generator{}, split(FLAGS_formats)}, + exec, get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp index 3c8d886df3b..4a7d014de8b 100644 --- a/benchmark/spmv/spmv_common.hpp +++ b/benchmark/spmv/spmv_common.hpp @@ -36,7 +36,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" +#include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/timer.hpp" #include "benchmark/utils/types.hpp" #ifdef GINKGO_BENCHMARK_ENABLE_TUNING @@ -48,57 +50,119 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. DEFINE_uint32(nrhs, 1, "The number of right hand sides"); -// This function supposes that management of `FLAGS_overwrite` is done before -// calling it -template -void apply_spmv(const char* format_name, std::shared_ptr exec, - const Generator& generator, std::shared_ptr timer, - const gko::matrix_data& data, - const VectorType* b, const VectorType* x, - const VectorType* answer, rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& spmv_case = test_case["spmv"]; - add_or_set_member(spmv_case, format_name, - rapidjson::Value(rapidjson::kObjectType), allocator); +template +struct spmv_benchmark_state { + gko::matrix_data data; + std::unique_ptr x; + std::unique_ptr b; + std::unique_ptr answer; +}; + + +template +struct SpmvBenchmark : Benchmark> { + using Vec = typename Generator::Vec; + std::string name; + std::vector formats; + bool do_print; + Generator generator; + + SpmvBenchmark(Generator generator, std::vector formats, + bool do_print = true) + : name{"spmv"}, + formats{std::move(formats)}, + generator{generator}, + do_print{do_print} + {} + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return formats; + } + + bool should_print() const override { return do_print; } + std::string get_example_config() const override + { + return generator.get_example_config(); + } + + bool validate_config(const json& test_case) const override + { + return generator.validate_config(test_case); + } + + std::string describe_config(const json& test_case) const override + { + return generator.describe_config(test_case); + } + + spmv_benchmark_state setup(std::shared_ptr exec, + json& test_case) const override + { + spmv_benchmark_state state; + state.data = generator.generate_matrix_data(test_case); + + auto nrhs = FLAGS_nrhs; + state.b = generator.create_multi_vector_random( + exec, gko::dim<2>{state.data.size[1], nrhs}); + state.x = generator.create_multi_vector_random( + exec, gko::dim<2>{state.data.size[0], nrhs}); + if (do_print) { + std::clog << "Matrix is of size (" << state.data.size[0] << ", " + << state.data.size[1] << "), " + << state.data.nonzeros.size() << std::endl; + } + test_case["rows"] = state.data.size[0]; + test_case["cols"] = state.data.size[1]; + test_case["nonzeros"] = state.data.nonzeros.size(); + if (FLAGS_detailed) { + state.answer = gko::clone(state.x); + auto system_matrix = + generator.generate_matrix_with_default_format(exec, state.data); + exec->synchronize(); + system_matrix->apply(state.b, state.answer); + exec->synchronize(); + } + return state; + } + + void run(std::shared_ptr exec, std::shared_ptr timer, + spmv_benchmark_state& state, + const std::string& format_name, json& format_case) const override + { auto system_matrix = generator.generate_matrix_with_format( - exec, format_name, data, &spmv_case[format_name], &allocator); + exec, format_name, state.data, &format_case); // check the residual if (FLAGS_detailed) { - auto x_clone = clone(x); + auto x_clone = clone(state.x); exec->synchronize(); - system_matrix->apply(b, x_clone); + system_matrix->apply(state.b, x_clone); exec->synchronize(); auto max_relative_norm2 = - compute_max_relative_norm2(x_clone.get(), answer); - add_or_set_member(spmv_case[format_name], "max_relative_norm2", - max_relative_norm2, allocator); + compute_max_relative_norm2(x_clone.get(), state.answer.get()); + format_case["max_relative_norm2"] = max_relative_norm2; } IterationControl ic{timer}; // warm run for (auto _ : ic.warmup_run()) { - auto x_clone = clone(x); + auto x_clone = clone(state.x); exec->synchronize(); - system_matrix->apply(b, x_clone); + system_matrix->apply(state.b, x_clone); exec->synchronize(); } // tuning run #ifdef GINKGO_BENCHMARK_ENABLE_TUNING auto& format_case = spmv_case[format_name]; - if (!format_case.HasMember("tuning")) { - format_case.AddMember( - "tuning", rapidjson::Value(rapidjson::kObjectType), allocator); - } + format_case["tuning"] = json::object(); auto& tuning_case = format_case["tuning"]; - add_or_set_member(tuning_case, "time", - rapidjson::Value(rapidjson::kArrayType), allocator); - add_or_set_member(tuning_case, "values", - rapidjson::Value(rapidjson::kArrayType), allocator); + tuning_case["time"] = json::array(); + tuning_case["values"] = json::array(); // Enable tuning for this portion of code gko::_tuning_flag = true; @@ -112,13 +176,13 @@ void apply_spmv(const char* format_name, std::shared_ptr exec, gko::_tuned_value = val; auto tuning_timer = get_timer(exec, FLAGS_gpu_timer); IterationControl ic_tuning{tuning_timer}; - auto x_clone = clone(x); + auto x_clone = clone(state.x); for (auto _ : ic_tuning.run()) { - system_matrix->apply(b, x_clone); + system_matrix->apply(state.b, x_clone); } - tuning_case["time"].PushBack( - ic_tuning.compute_time(FLAGS_timer_method), allocator); - tuning_case["values"].PushBack(val, allocator); + tuning_case["time"].push_back( + ic_tuning.compute_time(FLAGS_timer_method)); + tuning_case["values"].push_back(val); } // We put back the flag to false to use the default (non-tuned) values // for the following @@ -126,142 +190,41 @@ void apply_spmv(const char* format_name, std::shared_ptr exec, #endif // GINKGO_BENCHMARK_ENABLE_TUNING // timed run - auto x_clone = clone(x); + auto x_clone = clone(state.x); for (auto _ : ic.run()) { - system_matrix->apply(b, x_clone); - } - add_or_set_member(spmv_case[format_name], "time", - ic.compute_time(FLAGS_timer_method), allocator); - add_or_set_member(spmv_case[format_name], "repetitions", - ic.get_num_repetitions(), allocator); - - // compute and write benchmark data - add_or_set_member(spmv_case[format_name], "completed", true, allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case["spmv"][format_name], "completed", false, - allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["spmv"][format_name], "error", - msg_value, allocator); + system_matrix->apply(state.b, x_clone); } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; + format_case["time"] = ic.compute_time(FLAGS_timer_method); + format_case["repetitions"] = ic.get_num_repetitions(); } -} - - -template -void run_spmv_benchmark(std::shared_ptr exec, - rapidjson::Document& test_cases, - const std::vector formats, - const SystemGenerator& system_generator, - std::shared_ptr timer, bool do_print) -{ - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = annotate_functor{profiler_hook}; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - system_generator.validate_options(test_case); - if (!test_case.HasMember("spmv")) { - test_case.AddMember("spmv", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& spmv_case = test_case["spmv"]; - if (!FLAGS_overwrite && - all_of(begin(formats), end(formats), - [&spmv_case](const std::string& s) { - return spmv_case.HasMember(s.c_str()); - })) { - continue; - } - if (do_print) { - std::clog << "Running test case\n" << test_case << std::endl; - } - // annotate the test case - auto test_case_range = - annotate(system_generator.describe_config(test_case)); - - auto data = system_generator.generate_matrix_data(test_case); - - auto nrhs = FLAGS_nrhs; - auto b = system_generator.create_multi_vector_random( - exec, gko::dim<2>{data.size[1], nrhs}); - auto x = system_generator.create_multi_vector_random( - exec, gko::dim<2>{data.size[0], nrhs}); - if (do_print) { - std::clog << "Matrix is of size (" << data.size[0] << ", " - << data.size[1] << ")" << std::endl; - } - add_or_set_member(test_case, "size", data.size[0], allocator); - add_or_set_member(test_case, "nnz", data.nonzeros.size(), - allocator); - auto best_performance = std::numeric_limits::max(); - if (!test_case.HasMember("optimal")) { - test_case.AddMember("optimal", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - // Compute the result from ginkgo::coo as the correct answer - auto answer = gko::clone(x); - if (FLAGS_detailed) { - auto system_matrix = - system_generator.generate_matrix_with_default_format(exec, - data); - exec->synchronize(); - system_matrix->apply(b, answer); - exec->synchronize(); + void postprocess(json& test_case) const override + { + if (!test_case.contains("optimal")) { + test_case["optimal"] = json::object(); + } + auto best_time = std::numeric_limits::max(); + std::string best_format; + // find the fastest among all formats we tested + for (const auto& format : formats) { + if (!test_case[name].contains(format)) { + continue; } - for (const auto& format_name : formats) { - { - auto format_range = annotate(format_name.c_str()); - apply_spmv(format_name.c_str(), exec, system_generator, - timer, data, b.get(), x.get(), answer.get(), - test_case, allocator); - } - if (do_print) { - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - } - if (spmv_case[format_name.c_str()]["completed"].GetBool()) { - auto performance = - spmv_case[format_name.c_str()]["time"].GetDouble(); - if (performance < best_performance) { - best_performance = performance; - add_or_set_member( - test_case["optimal"], "spmv", - rapidjson::Value(format_name.c_str(), allocator) - .Move(), - allocator); - } - } - if (do_print) { - backup_results(test_cases); + auto& format_case = test_case[name][format]; + if (format_case.contains("completed") && + format_case["completed"].template get()) { + auto time = format_case["time"]; + if (time < best_time) { + best_time = time; + best_format = format; } } - } catch (const std::exception& e) { - std::cerr << "Error setting up matrix data, what(): " << e.what() - << std::endl; - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case, "error", msg_value, allocator); - } + } + if (!best_format.empty()) { + test_case["optimal"][name] = best_format; } } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } -} +}; + #endif // GINKGO_BENCHMARK_SPMV_SPMV_COMMON_HPP diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr index abc496b0921..b64f4321287 100644 --- a/benchmark/test/reference/blas.profile.stderr +++ b/benchmark/test/reference/blas.profile.stderr @@ -10,6 +10,7 @@ Running test case "blas": {} } DEBUG: begin n = 100 + Running blas: copy DEBUG: begin copy DEBUG: begin allocate DEBUG: end allocate @@ -24,21 +25,7 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end copy -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - } - } - } -] + Running blas: axpy DEBUG: begin axpy DEBUG: begin allocate DEBUG: end allocate @@ -61,28 +48,7 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end axpy -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - } - } - } -] + Running blas: scal DEBUG: begin scal DEBUG: begin allocate DEBUG: end allocate @@ -99,33 +65,4 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end scal -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - }, - "scal": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - } - } - } -] DEBUG: end n = 100 diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr index 9508b0dcf1e..f41b25c6ee1 100644 --- a/benchmark/test/reference/blas.simple.stderr +++ b/benchmark/test/reference/blas.simple.stderr @@ -9,69 +9,6 @@ Running test case "n": 100, "blas": {} } -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "scal": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] + Running blas: copy + Running blas: axpy + Running blas: scal diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr index 9ab8a899649..1d5df7477ba 100644 --- a/benchmark/test/reference/conversion.all.stderr +++ b/benchmark/test/reference/conversion.all.stderr @@ -4,1853 +4,23 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr,ell,sellp,hybrid -Benchmarking conversions. Running test case { "size": 100, "stencil": "7pt", - "conversions": {} -} -Matrix is of size (125, 125) -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - } - } - } -] -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - } - } - } -] -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - } - } - } -] -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - }, - "hybrid-coo": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - }, - "hybrid-coo": { - "completed": false, - "error": "" - } - } - } -] -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - }, - "hybrid-coo": { - "completed": false, - "error": "" - }, - "hybrid-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - }, - "hybrid-coo": { - "completed": false, - "error": "" - }, - "hybrid-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "hybrid-ell": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - }, - "hybrid-coo": { - "completed": false, - "error": "" - }, - "hybrid-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "hybrid-ell": { - "completed": false, - "error": "" - } - } - } -] -Error when processing test case -{ - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - }, - "hybrid-coo": { - "completed": false, - "error": "" - }, - "hybrid-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "hybrid-ell": { - "completed": false, - "error": "" - }, - "hybrid-sellp": { - "completed": false, - "error": "" - } - } -} -what(): -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" - }, - "coo-hybrid": { - "completed": false, - "error": "" - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-ell": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-sellp": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-hybrid": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-coo": { - "completed": false, - "error": "" - }, - "ell-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" - }, - "sellp-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - }, - "hybrid-coo": { - "completed": false, - "error": "" - }, - "hybrid-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "hybrid-ell": { - "completed": false, - "error": "" - }, - "hybrid-sellp": { - "completed": false, - "error": "" - } - } - } -] + "conversion": {} +} +Matrix is of size (125, 125), 725 + Running conversion: coo-read + Running conversion: coo-csr + Running conversion: csr-read + Running conversion: csr-coo + Running conversion: csr-ell + Running conversion: csr-sellp + Running conversion: csr-hybrid + Running conversion: ell-read + Running conversion: ell-csr + Running conversion: sellp-read + Running conversion: sellp-csr + Running conversion: hybrid-read + Running conversion: hybrid-csr diff --git a/benchmark/test/reference/conversion.all.stdout b/benchmark/test/reference/conversion.all.stdout index cb53bb81a6c..c4b657a42c4 100644 --- a/benchmark/test/reference/conversion.all.stdout +++ b/benchmark/test/reference/conversion.all.stdout @@ -1,25 +1,23 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", - "conversions": { - "coo-csr": { + "conversion": { + "coo-read": { "time": 1.0, "repetitions": 10, "completed": true }, - "coo-ell": { - "completed": false, - "error": "" - }, - "coo-sellp": { - "completed": false, - "error": "" + "coo-csr": { + "time": 1.0, + "repetitions": 10, + "completed": true }, - "coo-hybrid": { - "completed": false, - "error": "" + "csr-read": { + "time": 1.0, + "repetitions": 10, + "completed": true }, "csr-coo": { "time": 1.0, @@ -41,57 +39,39 @@ "repetitions": 10, "completed": true }, - "ell-coo": { - "completed": false, - "error": "" + "ell-read": { + "time": 1.0, + "repetitions": 10, + "completed": true }, "ell-csr": { "time": 1.0, "repetitions": 10, "completed": true }, - "ell-sellp": { - "completed": false, - "error": "" - }, - "ell-hybrid": { - "completed": false, - "error": "" - }, - "sellp-coo": { - "completed": false, - "error": "" + "sellp-read": { + "time": 1.0, + "repetitions": 10, + "completed": true }, "sellp-csr": { "time": 1.0, "repetitions": 10, "completed": true }, - "sellp-ell": { - "completed": false, - "error": "" - }, - "sellp-hybrid": { - "completed": false, - "error": "" - }, - "hybrid-coo": { - "completed": false, - "error": "" + "hybrid-read": { + "time": 1.0, + "repetitions": 10, + "completed": true }, "hybrid-csr": { "time": 1.0, "repetitions": 10, "completed": true - }, - "hybrid-ell": { - "completed": false, - "error": "" - }, - "hybrid-sellp": { - "completed": false, - "error": "" } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/conversion.matrix.stderr b/benchmark/test/reference/conversion.matrix.stderr index 1d604175479..369a363a53e 100644 --- a/benchmark/test/reference/conversion.matrix.stderr +++ b/benchmark/test/reference/conversion.matrix.stderr @@ -4,43 +4,13 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Benchmarking conversions. Running test case { "filename": "", - "conversions": {} + "conversion": {} } -Matrix is of size (36, 36) -Current state: -[ - { - "filename": "", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - }, - "size": 36 - } -] -Current state: -[ - { - "filename": "", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - }, - "size": 36 - } -] +Matrix is of size (36, 36), 208 + Running conversion: coo-read + Running conversion: coo-csr + Running conversion: csr-read + Running conversion: csr-coo diff --git a/benchmark/test/reference/conversion.matrix.stdout b/benchmark/test/reference/conversion.matrix.stdout index e43edda0595..7e537fa4919 100644 --- a/benchmark/test/reference/conversion.matrix.stdout +++ b/benchmark/test/reference/conversion.matrix.stdout @@ -2,18 +2,30 @@ [ { "filename": "", - "conversions": { + "conversion": { + "coo-read": { + "time": 1.0, + "repetitions": 10, + "completed": true + }, "coo-csr": { "time": 1.0, "repetitions": 10, "completed": true }, + "csr-read": { + "time": 1.0, + "repetitions": 10, + "completed": true + }, "csr-coo": { "time": 1.0, "repetitions": 10, "completed": true } }, - "size": 36 + "rows": 36, + "cols": 36, + "nonzeros": 208 } ] diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr index 6733472be8f..089e6be02f9 100644 --- a/benchmark/test/reference/conversion.profile.stderr +++ b/benchmark/test/reference/conversion.profile.stderr @@ -4,15 +4,16 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Benchmarking conversions. Running test case { "size": 100, "stencil": "7pt", - "conversions": {} + "conversion": {} } -Matrix is of size (125, 125) -DEBUG: begin stencil(125,7pt) +Matrix is of size (125, 125), 725 +DEBUG: begin stencil(100,7pt) + Running conversion: coo-read +DEBUG: begin coo-read DEBUG: begin allocate DEBUG: end allocate DEBUG: begin allocate @@ -21,13 +22,17 @@ DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end coo-read + Running conversion: coo-csr DEBUG: begin coo-csr DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin components::fill_array -DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin allocate DEBUG: end allocate DEBUG: begin allocate @@ -36,12 +41,8 @@ DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin components::convert_idxs_to_ptrs -DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free +DEBUG: begin components::fill_array +DEBUG: end components::fill_array DEBUG: begin copy() DEBUG: begin allocate DEBUG: end allocate @@ -49,14 +50,10 @@ DEBUG: begin free DEBUG: end free DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy DEBUG: begin components::convert_idxs_to_ptrs @@ -68,27 +65,15 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end coo-csr -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 1, - "completed": true - } - } - } -] DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free +DEBUG: end coo-csr + Running conversion: csr-read +DEBUG: begin csr-read DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::fill_array @@ -109,32 +94,46 @@ DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs DEBUG: begin free DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end csr-read + Running conversion: csr-coo DEBUG: begin csr-coo DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin allocate +DEBUG: end allocate DEBUG: begin allocate DEBUG: end allocate DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin copy() DEBUG: begin allocate DEBUG: end allocate DEBUG: begin free DEBUG: end free +DEBUG: begin components::convert_idxs_to_ptrs +DEBUG: end components::convert_idxs_to_ptrs +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy() +DEBUG: begin allocate +DEBUG: end allocate DEBUG: begin copy DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_ptrs_to_idxs DEBUG: end components::convert_ptrs_to_idxs DEBUG: end copy() @@ -144,30 +143,11 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end csr-coo -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 1, - "completed": true - }, - "csr-coo": { - "time": 1.0, - "repetitions": 1, - "completed": true - } - } - } -] DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(125,7pt) +DEBUG: end csr-coo +DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/conversion.profile.stdout b/benchmark/test/reference/conversion.profile.stdout index 3e76bc26934..b29815f6c17 100644 --- a/benchmark/test/reference/conversion.profile.stdout +++ b/benchmark/test/reference/conversion.profile.stdout @@ -1,19 +1,32 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", - "conversions": { + "conversion": { + "coo-read": { + "time": 1.0, + "repetitions": 1, + "completed": true + }, "coo-csr": { "time": 1.0, "repetitions": 1, "completed": true }, + "csr-read": { + "time": 1.0, + "repetitions": 1, + "completed": true + }, "csr-coo": { "time": 1.0, "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr index d221ead12a4..a814dba6888 100644 --- a/benchmark/test/reference/conversion.simple.stderr +++ b/benchmark/test/reference/conversion.simple.stderr @@ -4,44 +4,14 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Benchmarking conversions. Running test case { "size": 100, "stencil": "7pt", - "conversions": {} + "conversion": {} } -Matrix is of size (125, 125) -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "conversions": { - "coo-csr": { - "time": 1.0, - "repetitions": 10, - "completed": true - }, - "csr-coo": { - "time": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] +Matrix is of size (125, 125), 725 + Running conversion: coo-read + Running conversion: coo-csr + Running conversion: csr-read + Running conversion: csr-coo diff --git a/benchmark/test/reference/conversion.simple.stdout b/benchmark/test/reference/conversion.simple.stdout index 9ecdd46f5e1..856f1330eea 100644 --- a/benchmark/test/reference/conversion.simple.stdout +++ b/benchmark/test/reference/conversion.simple.stdout @@ -1,19 +1,32 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", - "conversions": { + "conversion": { + "coo-read": { + "time": 1.0, + "repetitions": 10, + "completed": true + }, "coo-csr": { "time": 1.0, "repetitions": 10, "completed": true }, + "csr-read": { + "time": 1.0, + "repetitions": 10, + "completed": true + }, "csr-coo": { "time": 1.0, "repetitions": 10, "completed": true } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/distributed_solver.matrix.stdout b/benchmark/test/reference/distributed_solver.matrix.stdout index 34fdda13e55..cd3c7b8bd43 100644 --- a/benchmark/test/reference/distributed_solver.matrix.stdout +++ b/benchmark/test/reference/distributed_solver.matrix.stdout @@ -52,6 +52,7 @@ "completed": true } }, - "size": 36 + "rows": 36, + "cols": 36 } ] diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr index efd79f66dc5..e583a1411a8 100644 --- a/benchmark/test/reference/distributed_solver.profile.stderr +++ b/benchmark/test/reference/distributed_solver.profile.stderr @@ -5,7 +5,6 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -DEBUG: begin stencil(100,7pt,stencil) Running test case { "size": 100, @@ -213,9 +212,9 @@ DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() Matrix is of size (125, 125) -DEBUG: begin cg +DEBUG: begin stencil(100,7pt,stencil) Running solver: cg -DEBUG: begin none +DEBUG: begin cg DEBUG: begin allocate DEBUG: end allocate DEBUG: begin dense::compute_squared_norm2 @@ -670,8 +669,8 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end none DEBUG: end cg +DEBUG: end stencil(100,7pt,stencil) DEBUG: begin free DEBUG: end free DEBUG: begin free @@ -686,4 +685,3 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt,stencil) diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout index c61541a5d5b..aef92652256 100644 --- a/benchmark/test/reference/distributed_solver.profile.stdout +++ b/benchmark/test/reference/distributed_solver.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": { @@ -27,6 +27,8 @@ "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125 } ] diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout index 54d7233ba77..002b9d91347 100644 --- a/benchmark/test/reference/distributed_solver.simple.stdout +++ b/benchmark/test/reference/distributed_solver.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": { @@ -53,6 +53,8 @@ "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125 } ] diff --git a/benchmark/test/reference/matrix_statistics.matrix.stderr b/benchmark/test/reference/matrix_statistics.matrix.stderr index af205c778c0..7bb33842f25 100644 --- a/benchmark/test/reference/matrix_statistics.matrix.stderr +++ b/benchmark/test/reference/matrix_statistics.matrix.stderr @@ -5,4 +5,4 @@ Running test case "filename": "", "problem": {} } -Matrix is of size (36, 36) +Matrix is of size (36, 36), 208 diff --git a/benchmark/test/reference/matrix_statistics.matrix.stdout b/benchmark/test/reference/matrix_statistics.matrix.stdout index a056241669b..ea73587fde4 100644 --- a/benchmark/test/reference/matrix_statistics.matrix.stdout +++ b/benchmark/test/reference/matrix_statistics.matrix.stdout @@ -33,6 +33,8 @@ "hyperflatness": 6.0545648993883665 } }, - "size": 36 + "rows": 36, + "cols": 36, + "nonzeros": 208 } ] diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr index 6b853c3f4ea..75a7cca709f 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stderr +++ b/benchmark/test/reference/matrix_statistics.simple.stderr @@ -6,4 +6,4 @@ Running test case "stencil": "7pt", "problem": {} } -Matrix is of size (125, 125) +Matrix is of size (125, 125), 725 diff --git a/benchmark/test/reference/matrix_statistics.simple.stdout b/benchmark/test/reference/matrix_statistics.simple.stdout index 4470784e7c5..13746ce8a46 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stdout +++ b/benchmark/test/reference/matrix_statistics.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "problem": { "rows": 125, @@ -33,6 +33,9 @@ "hyperskewness": -1.741577812922432, "hyperflatness": 7.762345679012379 } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/preconditioner.matrix.stderr b/benchmark/test/reference/preconditioner.matrix.stderr index c9ef583d79e..4088a20c925 100644 --- a/benchmark/test/reference/preconditioner.matrix.stderr +++ b/benchmark/test/reference/preconditioner.matrix.stderr @@ -9,34 +9,5 @@ Running test case "filename": "", "preconditioner": {} } -Matrix is of size (36, 36) -Current state: -[ - { - "filename": "", - "preconditioner": { - "none": { - "generate": { - "components": { - "generate()": 1.0, - "overhead": 1.0 - }, - "time": 1.0, - "repetitions": 10 - }, - "apply": { - "components": { - "apply()": 1.0, - "copy()": 1.0, - "dense::copy": 1.0, - "overhead": 1.0 - }, - "time": 1.0, - "repetitions": 10 - }, - "completed": true - } - }, - "size": 36 - } -] +Matrix is of size (36, 36), 208 + Running preconditioner: none diff --git a/benchmark/test/reference/preconditioner.matrix.stdout b/benchmark/test/reference/preconditioner.matrix.stdout index 77979f4c54b..0415a87ea8d 100644 --- a/benchmark/test/reference/preconditioner.matrix.stdout +++ b/benchmark/test/reference/preconditioner.matrix.stdout @@ -25,6 +25,8 @@ "completed": true } }, - "size": 36 + "rows": 36, + "cols": 36, + "nonzeros": 208 } ] diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr index 5b47bc9bd94..c215b22c925 100644 --- a/benchmark/test/reference/preconditioner.profile.stderr +++ b/benchmark/test/reference/preconditioner.profile.stderr @@ -10,7 +10,6 @@ Running test case "stencil": "7pt", "preconditioner": {} } -DEBUG: begin stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::fill_array @@ -59,7 +58,9 @@ DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -Matrix is of size (125, 125) +Matrix is of size (125, 125), 725 +DEBUG: begin stencil(100,7pt) + Running preconditioner: none DEBUG: begin none DEBUG: begin copy() DEBUG: begin allocate @@ -78,28 +79,7 @@ DEBUG: end apply() DEBUG: begin free DEBUG: end free DEBUG: end none -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "preconditioner": { - "none": { - "generate": { - "components": {}, - "time": 1.0, - "repetitions": 1 - }, - "apply": { - "components": {}, - "time": 1.0, - "repetitions": 1 - }, - "completed": true - } - } - } -] +DEBUG: end stencil(100,7pt) DEBUG: begin free DEBUG: end free DEBUG: begin free @@ -110,4 +90,3 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout index cc73c4c4552..f53407d818d 100644 --- a/benchmark/test/reference/preconditioner.profile.stdout +++ b/benchmark/test/reference/preconditioner.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "preconditioner": { "none": { @@ -17,6 +17,9 @@ }, "completed": true } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr index d480d4fedbd..07d2cca6704 100644 --- a/benchmark/test/reference/preconditioner.simple.stderr +++ b/benchmark/test/reference/preconditioner.simple.stderr @@ -10,34 +10,5 @@ Running test case "stencil": "7pt", "preconditioner": {} } -Matrix is of size (125, 125) -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "preconditioner": { - "none": { - "generate": { - "components": { - "generate()": 1.0, - "overhead": 1.0 - }, - "time": 1.0, - "repetitions": 10 - }, - "apply": { - "components": { - "apply()": 1.0, - "copy()": 1.0, - "dense::copy": 1.0, - "overhead": 1.0 - }, - "time": 1.0, - "repetitions": 10 - }, - "completed": true - } - } - } -] +Matrix is of size (125, 125), 725 + Running preconditioner: none diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout index c47146a72e1..92bb51ddb57 100644 --- a/benchmark/test/reference/preconditioner.simple.stdout +++ b/benchmark/test/reference/preconditioner.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "preconditioner": { "none": { @@ -25,6 +25,9 @@ }, "completed": true } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/solver.matrix.stdout b/benchmark/test/reference/solver.matrix.stdout index 6a1f8ceb959..56577288c2d 100644 --- a/benchmark/test/reference/solver.matrix.stdout +++ b/benchmark/test/reference/solver.matrix.stdout @@ -50,6 +50,7 @@ "completed": true } }, - "size": 36 + "rows": 36, + "cols": 36 } ] diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr index 65b7560d936..0c3f7060796 100644 --- a/benchmark/test/reference/solver.profile.stderr +++ b/benchmark/test/reference/solver.profile.stderr @@ -5,7 +5,6 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -DEBUG: begin stencil(100,7pt) Running test case { "size": 100, @@ -62,9 +61,9 @@ DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() Matrix is of size (125, 125) -DEBUG: begin cg +DEBUG: begin stencil(100,7pt) Running solver: cg -DEBUG: begin none +DEBUG: begin cg DEBUG: begin allocate DEBUG: end allocate DEBUG: begin dense::compute_norm2_dispatch @@ -425,8 +424,8 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end none DEBUG: end cg +DEBUG: end stencil(100,7pt) DEBUG: begin free DEBUG: end free DEBUG: begin free @@ -437,4 +436,3 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout index 128a8a1f169..0148e6ef092 100644 --- a/benchmark/test/reference/solver.profile.stdout +++ b/benchmark/test/reference/solver.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "optimal": { "spmv": "csr" @@ -26,6 +26,8 @@ "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125 } ] diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout index c6055339d67..b4e7b56b2bf 100644 --- a/benchmark/test/reference/solver.simple.stdout +++ b/benchmark/test/reference/solver.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "optimal": { "spmv": "csr" @@ -50,6 +50,8 @@ "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125 } ] diff --git a/benchmark/test/reference/sparse_blas.matrix.stderr b/benchmark/test/reference/sparse_blas.matrix.stderr index 5001c604e72..ff52b6a3269 100644 --- a/benchmark/test/reference/sparse_blas.matrix.stderr +++ b/benchmark/test/reference/sparse_blas.matrix.stderr @@ -3,34 +3,11 @@ This is Ginkgo 1.7.0 (develop) Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 -The operations are transposeRunning test case +The operations are transpose +Running test case { "filename": "", "sparse_blas": {} } Matrix is of size (36, 36), 208 -Current state: -[ - { - "filename": "", - "sparse_blas": { - "transpose": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "components": { - "allocate": 1.0, - "components::fill_array": 1.0, - "csr::transpose": 1.0, - "free": 1.0, - "overhead": 1.0 - }, - "completed": true - } - }, - "rows": 36, - "cols": 36, - "nonzeros": 208 - } -] + Running sparse_blas: transpose diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr index d05f5117b8e..d1434dad146 100644 --- a/benchmark/test/reference/sparse_blas.profile.stderr +++ b/benchmark/test/reference/sparse_blas.profile.stderr @@ -3,7 +3,8 @@ This is Ginkgo 1.7.0 (develop) Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 -The operations are transposeRunning test case +The operations are transpose +Running test case { "size": 100, "stencil": "7pt", @@ -35,6 +36,7 @@ DEBUG: end components::convert_idxs_to_ptrs DEBUG: begin free DEBUG: end free DEBUG: begin stencil(100,7pt) + Running sparse_blas: transpose DEBUG: begin transpose DEBUG: begin allocate DEBUG: end allocate @@ -53,25 +55,6 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end transpose -Current state: -[ - { - "size": 100, - "stencil": "7pt", - "sparse_blas": { - "transpose": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - } - }, - "rows": 125, - "cols": 125, - "nonzeros": 725 - } -] DEBUG: end stencil(100,7pt) DEBUG: begin free DEBUG: end free diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr index bf5001f67b7..452374a9268 100644 --- a/benchmark/test/reference/sparse_blas.simple.stderr +++ b/benchmark/test/reference/sparse_blas.simple.stderr @@ -3,36 +3,12 @@ This is Ginkgo 1.7.0 (develop) Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 -The operations are transposeRunning test case +The operations are transpose +Running test case { "size": 100, "stencil": "7pt", "sparse_blas": {} } Matrix is of size (125, 125), 725 -Current state: -[ - { - "size": 100, - "stencil": "7pt", - "sparse_blas": { - "transpose": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "components": { - "allocate": 1.0, - "components::fill_array": 1.0, - "csr::transpose": 1.0, - "free": 1.0, - "overhead": 1.0 - }, - "completed": true - } - }, - "rows": 125, - "cols": 125, - "nonzeros": 725 - } -] + Running sparse_blas: transpose diff --git a/benchmark/test/reference/spmv.matrix.stderr b/benchmark/test/reference/spmv.matrix.stderr index 8d942cd0de5..a618da5b321 100644 --- a/benchmark/test/reference/spmv.matrix.stderr +++ b/benchmark/test/reference/spmv.matrix.stderr @@ -10,22 +10,5 @@ Running test case "filename": "", "spmv": {} } -Matrix is of size (36, 36) -Current state: -[ - { - "filename": "", - "spmv": { - "coo": { - "storage": 3328, - "max_relative_norm2": 1.0, - "time": 1.0, - "repetitions": 10, - "completed": true - } - }, - "size": 36, - "nnz": 208, - "optimal": {} - } -] +Matrix is of size (36, 36), 208 + Running spmv: coo diff --git a/benchmark/test/reference/spmv.matrix.stdout b/benchmark/test/reference/spmv.matrix.stdout index 47035c27549..dc30ab6b284 100644 --- a/benchmark/test/reference/spmv.matrix.stdout +++ b/benchmark/test/reference/spmv.matrix.stdout @@ -11,8 +11,9 @@ "completed": true } }, - "size": 36, - "nnz": 208, + "rows": 36, + "cols": 36, + "nonzeros": 208, "optimal": { "spmv": "coo" } diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr index 961ac587990..09a10b725ea 100644 --- a/benchmark/test/reference/spmv.profile.stderr +++ b/benchmark/test/reference/spmv.profile.stderr @@ -11,7 +11,6 @@ Running test case "stencil": "7pt", "spmv": {} } -DEBUG: begin stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin allocate @@ -52,13 +51,9 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -Matrix is of size (125, 125) -DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin dense::copy -DEBUG: end dense::copy -DEBUG: end copy() +Matrix is of size (125, 125), 725 +DEBUG: begin stencil(100,7pt) + Running spmv: coo DEBUG: begin coo DEBUG: begin allocate DEBUG: end allocate @@ -87,27 +82,8 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end coo -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "spmv": { - "coo": { - "storage": 11600, - "time": 1.0, - "repetitions": 1, - "completed": true - } - }, - "nnz": 725, - "optimal": {} - } -] -DEBUG: begin free -DEBUG: end free +DEBUG: end stencil(100,7pt) DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout index dacc490ddf0..5302d54f9f0 100644 --- a/benchmark/test/reference/spmv.profile.stdout +++ b/benchmark/test/reference/spmv.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "spmv": { "coo": { @@ -11,7 +11,9 @@ "completed": true } }, - "nnz": 725, + "rows": 125, + "cols": 125, + "nonzeros": 725, "optimal": { "spmv": "coo" } diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr index dc9933b40ec..a910512ff31 100644 --- a/benchmark/test/reference/spmv.simple.stderr +++ b/benchmark/test/reference/spmv.simple.stderr @@ -11,22 +11,5 @@ Running test case "stencil": "7pt", "spmv": {} } -Matrix is of size (125, 125) -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "spmv": { - "coo": { - "storage": 11600, - "max_relative_norm2": 1.0, - "time": 1.0, - "repetitions": 10, - "completed": true - } - }, - "nnz": 725, - "optimal": {} - } -] +Matrix is of size (125, 125), 725 + Running spmv: coo diff --git a/benchmark/test/reference/spmv.simple.stdout b/benchmark/test/reference/spmv.simple.stdout index 90f8903a452..737938d7c96 100644 --- a/benchmark/test/reference/spmv.simple.stdout +++ b/benchmark/test/reference/spmv.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "spmv": { "coo": { @@ -12,7 +12,9 @@ "completed": true } }, - "nnz": 725, + "rows": 125, + "cols": 125, + "nonzeros": 725, "optimal": { "spmv": "coo" } diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index b7ec0e72cf1..41acb560ba1 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -53,10 +54,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include -#include -#include -#include #include @@ -100,10 +97,6 @@ DEFINE_string( DEFINE_bool(detailed, true, "If set, performs several runs to obtain more detailed results"); -DEFINE_bool(keep_errors, true, - "If set, writes exception messages during the execution into the " - "JSON output"); - DEFINE_bool(nested_names, false, "If set, separately logs nested operations"); DEFINE_bool(profile, false, @@ -157,27 +150,32 @@ std::unique_ptr input_stream; * @param format the format of the benchmark input data */ void initialize_argument_parsing(int* argc, char** argv[], std::string& header, - std::string& format) + std::string& format, bool do_print = true) { - std::ostringstream doc; - doc << header << "Usage: " << (*argv)[0] << " [options]\n" - << format - << " The results are written on standard output, in the same " - "format,\n" - << " but with test cases extended to include an additional member " - "\n" - << " object for each benchmark run.\n" - << " If run with a --backup flag, an intermediate result is " - "written \n" - << " to a file in the same format. The backup file can be used as " - "\n" - << " input to this test suite, and the benchmarking will \n" - << " continue from the point where the backup file was created."; - - gflags::SetUsageMessage(doc.str()); - std::ostringstream ver; - ver << gko::version_info::get(); - gflags::SetVersionString(ver.str()); + if (do_print) { + std::ostringstream doc; + doc << header << "Usage: " << (*argv)[0] << " [options]\n" + << format + << " The results are written on standard output, in the same " + "format,\n" + << " but with test cases extended to include an additional member " + "\n" + << " object for each benchmark run.\n" + << " If run with a --backup flag, an intermediate result is " + "written \n" + << " to a file in the same format. The backup file can be used as " + "\n" + << " input to this test suite, and the benchmarking will \n" + << " continue from the point where the backup file was created."; + + gflags::SetUsageMessage(doc.str()); + std::ostringstream ver; + ver << gko::version_info::get(); + gflags::SetVersionString(ver.str()); + } else { + gflags::SetUsageMessage(""); + gflags::SetVersionString(""); + } gflags::ParseCommandLineFlags(argc, argv, true); if (FLAGS_profile) { FLAGS_repetitions = "1"; @@ -206,20 +204,19 @@ void print_general_information(const std::string& extra) { std::clog << gko::version_info::get() << std::endl << "Running on " << FLAGS_executor << "(" << FLAGS_device_id - << ")" << std::endl + << ")\n" << "Running with " << FLAGS_warmup << " warm iterations and "; if (FLAGS_repetitions == "auto") { std::clog << "adaptively determined repetititions with " << FLAGS_min_repetitions << " <= rep <= " << FLAGS_max_repetitions - << " and a minimal runtime of " << FLAGS_min_runtime << "s" - << std::endl; + << " and a minimal runtime of " << FLAGS_min_runtime << "s\n"; } else { - std::clog << FLAGS_repetitions << " running iterations" << std::endl; + std::clog << FLAGS_repetitions << " running iterations\n"; } std::clog << "The random seed for right hand sides is " << FLAGS_seed - << std::endl - << extra; + << '\n' + << extra << '\n'; } @@ -319,7 +316,7 @@ std::istream& get_input_stream() // backup generation -void backup_results(rapidjson::Document& results) +void backup_results(json& results) { static int next = 0; static auto filenames = []() -> std::array { @@ -576,279 +573,4 @@ gko::remove_complex compute_max_relative_norm2( } -/** - * A class for controlling the number warmup and timed iterations. - * - * The behavior is determined by the following flags - * - 'repetitions' switch between fixed and adaptive number of iterations - * - 'warmup' warmup iterations, applies in fixed and adaptive case - * - 'min_repetitions' minimal number of repetitions (adaptive case) - * - 'max_repetitions' maximal number of repetitions (adaptive case) - * - 'min_runtime' minimal total runtime (adaptive case) - * - 'repetition_growth_factor' controls the increase between two successive - * timings - * - * Usage: - * `IterationControl` exposes the member functions: - * - `warmup_run()`: controls run defined by `warmup` flag - * - `run(bool)`: controls run defined by all other flags - * - `get_timer()`: access to underlying timer - * The first two methods return an object that is to be used in a range-based - * for loop: - * ``` - * IterationControl ic(get_timer(...)); - * - * // warmup run always uses fixed number of iteration and does not issue - * // timings - * for(auto status: ic.warmup_run()){ - * // execute benchmark - * } - * // run may use adaptive number of iterations (depending on cmd line flag) - * // and issues timing (unless manage_timings is false) - * for(auto status: ic.run(manage_timings [default is true])){ - * if(! manage_timings) ic.get_timer->tic(); - * // execute benchmark - * if(! manage_timings) ic.get_timer->toc(); - * } - * - * ``` - * At the beginning of both methods, the timer is reset. - * The `status` object exposes the member - * - `cur_it`, containing the current iteration number, - * and the methods - * - `is_finished`, checks if the benchmark is finished, - */ -class IterationControl { - using IndexType = unsigned int; //!< to be compatible with GFLAGS type - - class run_control; - -public: - /** - * Creates an `IterationControl` object. - * - * Uses the commandline flags to setup the stopping criteria for the - * warmup and timed run. - * - * @param timer the timer that is to be used for the timings - */ - explicit IterationControl(const std::shared_ptr& timer) - { - status_warmup_ = {TimerManager{timer, false}, FLAGS_warmup, - FLAGS_warmup, 0., 0}; - if (FLAGS_repetitions == "auto") { - status_run_ = {TimerManager{timer, true}, FLAGS_min_repetitions, - FLAGS_max_repetitions, FLAGS_min_runtime}; - } else { - const auto reps = - static_cast(std::stoi(FLAGS_repetitions)); - status_run_ = {TimerManager{timer, true}, reps, reps, 0., 0}; - } - } - - IterationControl() = default; - IterationControl(const IterationControl&) = default; - IterationControl(IterationControl&&) = default; - - /** - * Creates iterable `run_control` object for the warmup run. - * - * This run uses always a fixed number of iterations. - */ - run_control warmup_run() - { - status_warmup_.cur_it = 0; - status_warmup_.managed_timer.clear(); - return run_control{&status_warmup_}; - } - - /** - * Creates iterable `run_control` object for the timed run. - * - * This run may be adaptive, depending on the commandline flags. - * - * @param manage_timings If true, the timer calls (`tic/toc`) are handled - * by the `run_control` object, otherwise they need to be executed outside - */ - run_control run(bool manage_timings = true) - { - status_run_.cur_it = 0; - status_run_.managed_timer.clear(); - status_run_.managed_timer.manage_timings = manage_timings; - return run_control{&status_run_}; - } - - std::shared_ptr get_timer() const - { - return status_run_.managed_timer.timer; - } - - /** - * Compute the time from the given statistical method - * - * @param method the statistical method. If the timer does not have the - * same iteration as the IterationControl, it can only use - * average from the IterationControl. - * - * @return the statistical time - */ - double compute_time(const std::string& method = "average") const - { - if (status_run_.managed_timer.timer->get_num_repetitions() == - this->get_num_repetitions()) { - return status_run_.managed_timer.compute_time(method); - } else { - assert(method == "average"); - return status_run_.managed_timer.get_total_time() / - this->get_num_repetitions(); - } - } - - IndexType get_num_repetitions() const { return status_run_.cur_it; } - -private: - struct TimerManager { - std::shared_ptr timer; - bool manage_timings = false; - - void tic() - { - if (manage_timings) { - timer->tic(); - } - } - void toc(unsigned int num = 1) - { - if (manage_timings) { - timer->toc(num); - } - } - - void clear() { timer->clear(); } - - double get_total_time() const { return timer->get_total_time(); } - - double compute_time(const std::string& method = "average") const - { - return timer->compute_time(method); - } - }; - - /** - * Stores stopping criteria of the adaptive benchmark run as well as the - * current iteration number. - */ - struct status { - TimerManager managed_timer{}; - - IndexType min_it = 0; - IndexType max_it = 0; - double max_runtime = 0.; - - IndexType cur_it = 0; - - /** - * checks if the adaptive run is complete - * - * the adaptive run is complete if: - * - the minimum number of iteration is reached - * - and either: - * - the maximum number of repetitions is reached - * - the total runtime is above the threshold - * - * @return completeness state of the adaptive run - */ - bool is_finished() const - { - return cur_it >= min_it && - (cur_it >= max_it || - managed_timer.get_total_time() >= max_runtime); - } - }; - - /** - * Iterable class managing the benchmark iteration. - * - * Has to be used in a range-based for loop. - */ - struct run_control { - struct iterator { - /** - * Increases the current iteration count and finishes timing if - * necessary. - * - * As `++it` is the last step of a for-loop, the managed_timer is - * stopped, if enough iterations have passed since the last timing. - * The interval between two timings is steadily increased to - * reduce the timing overhead. - */ - iterator operator++() - { - cur_info->cur_it++; - if (cur_info->cur_it >= next_timing && !stopped) { - cur_info->managed_timer.toc( - static_cast(cur_info->cur_it - start_timing)); - stopped = true; - next_timing = static_cast(std::ceil( - next_timing * FLAGS_repetition_growth_factor)); - // If repetition_growth_factor <= 1, next_timing will be - // next iteration. - if (next_timing <= cur_info->cur_it) { - next_timing = cur_info->cur_it + 1; - } - } - return *this; - } - - status operator*() const { return *cur_info; } - - /** - * Checks if the benchmark is finished and handles timing, if - * necessary. - * - * As `begin != end` is the first step in a for-loop, the - * managed_timer is started, if it was previously stopped. - * Additionally, if the benchmark is complete and the managed_timer - * is still running it is stopped. (This may occur if the maximal - * number of repetitions is surpassed) - * - * Uses only the information from the `status` object, i.e. - * the right hand side is ignored. - * - * @return true if benchmark is not finished, else false - */ - bool operator!=(const iterator&) - { - const bool is_finished = cur_info->is_finished(); - if (!is_finished && stopped) { - stopped = false; - cur_info->managed_timer.tic(); - start_timing = cur_info->cur_it; - } else if (is_finished && !stopped) { - cur_info->managed_timer.toc( - static_cast(cur_info->cur_it - start_timing)); - stopped = true; - } - return !is_finished; - } - - status* cur_info; - IndexType next_timing = 1; //!< next iteration to stop timing - IndexType start_timing = 0; //!< iteration for starting timing - bool stopped = true; - }; - - iterator begin() const { return iterator{info}; } - - // not used, could potentially be used in c++17 as a sentinel - iterator end() const { return iterator{}; } - - status* info; - }; - - status status_warmup_; - status status_run_; -}; - - #endif // GKO_BENCHMARK_UTILS_GENERAL_HPP_ diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp index 2049dadf45f..39d8b5a8107 100644 --- a/benchmark/utils/general_matrix.hpp +++ b/benchmark/utils/general_matrix.hpp @@ -57,9 +57,9 @@ DEFINE_string(input_matrix, "", */ void initialize_argument_parsing_matrix( int* argc, char** argv[], std::string& header, std::string& format, - std::string additional_matrix_file_json = "") + std::string additional_matrix_file_json = "", bool do_print = true) { - initialize_argument_parsing(argc, argv, header, format); + initialize_argument_parsing(argc, argv, header, format, do_print); std::string input_matrix_str{FLAGS_input_matrix}; if (!input_matrix_str.empty()) { if (input_stream) { @@ -67,17 +67,13 @@ void initialize_argument_parsing_matrix( << "-input and -input_matrix cannot be used simultaneously\n"; std::exit(1); } - // create JSON for the filename via RapidJSON to ensure the string is - // correctly escaped - rapidjson::Document d; + // create JSON for the filename via nlohmann_json to ensure the string + // is correctly escaped auto json_template = R"([{"filename":"")" + additional_matrix_file_json + "}]"; - d.Parse(json_template.c_str()); - d[0]["filename"].SetString(input_matrix_str.c_str(), d.GetAllocator()); - rapidjson::StringBuffer sb; - rapidjson::PrettyWriter writer(sb); - d.Accept(writer); - input_stream = std::make_unique(sb.GetString()); + auto doc = json::parse(json_template); + doc[0]["filename"] = input_matrix_str; + input_stream = std::make_unique(doc.dump()); } } diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp index 076d2954980..257a2384634 100644 --- a/benchmark/utils/generator.hpp +++ b/benchmark/utils/generator.hpp @@ -53,28 +53,45 @@ struct DefaultSystemGenerator { using Vec = vec; static gko::matrix_data generate_matrix_data( - rapidjson::Value& config) + const json& config) { - if (config.HasMember("filename")) { - std::ifstream in(config["filename"].GetString()); + if (config.contains("filename")) { + std::ifstream in(config["filename"].get()); return gko::read_generic_raw(in); - } else if (config.HasMember("stencil")) { + } else if (config.contains("stencil")) { return generate_stencil( - config["stencil"].GetString(), config["size"].GetInt64()); + config["stencil"].get(), + config["size"].get()); } else { throw std::runtime_error( "No known way to generate matrix data found."); } } - static std::string describe_config(rapidjson::Value& config) + static std::string get_example_config() { - if (config.HasMember("filename")) { - return config["filename"].GetString(); - } else if (config.HasMember("stencil")) { + return json:: + parse(R"([{"filename": "my_file.mtx"},{"filename": "my_file2.mtx"},{"size": 100, "stencil": "7pt"}])") + .dump(4); + } + + static bool validate_config(const json& test_case) + { + return ((test_case.contains("size") && test_case.contains("stencil") && + test_case["size"].is_number_integer() && + test_case["stencil"].is_string()) || + (test_case.contains("filename") && + test_case["filename"].is_string())); + } + + static std::string describe_config(const json& config) + { + if (config.contains("filename")) { + return config["filename"].get(); + } else if (config.contains("stencil")) { std::stringstream ss; - ss << "stencil(" << config["size"].GetInt64() << "," - << config["stencil"].GetString() << ")"; + ss << "stencil(" << config["size"].get() << "," + << config["stencil"].get() << ")"; return ss.str(); } else { throw std::runtime_error("No known way to describe config."); @@ -82,30 +99,30 @@ struct DefaultSystemGenerator { } static std::shared_ptr generate_matrix_with_optimal_format( - std::shared_ptr exec, rapidjson::Value& config) + std::shared_ptr exec, json& config) { auto data = generate_matrix_data(config); return generate_matrix_with_format( - std::move(exec), config["optimal"]["spmv"].GetString(), data); + std::move(exec), config["optimal"]["spmv"].get(), + data); } static std::shared_ptr generate_matrix_with_format( std::shared_ptr exec, const std::string& format_name, const gko::matrix_data& data, - rapidjson::Value* spmv_case = nullptr, - rapidjson::MemoryPoolAllocator<>* allocator = nullptr) + json* spmv_case = nullptr) { auto storage_logger = std::make_shared(); - if (spmv_case && allocator) { + if (spmv_case) { exec->add_logger(storage_logger); } auto mtx = gko::share(::formats::matrix_factory(format_name, exec, data)); - if (spmv_case && allocator) { + if (spmv_case) { exec->remove_logger(storage_logger); - storage_logger->write_data(*spmv_case, *allocator); + storage_logger->write_data(*spmv_case); } return mtx; @@ -172,32 +189,51 @@ struct DistributedDefaultSystemGenerator { using Vec = dist_vec; gko::matrix_data generate_matrix_data( - rapidjson::Value& config) const + const json& config) const { - if (config.HasMember("filename")) { - std::ifstream in(config["filename"].GetString()); + if (config.contains("filename")) { + std::ifstream in(config["filename"].get()); return gko::read_generic_raw(in); - } else if (config.HasMember("stencil")) { + } else if (config.contains("stencil")) { auto local_size = static_cast( - config["size"].GetInt64() / comm.size()); + config["size"].get() / comm.size()); return generate_stencil( - config["stencil"].GetString(), comm, local_size, - config["comm_pattern"].GetString() == std::string("optimal")); + config["stencil"].get(), comm, local_size, + config["comm_pattern"].get() == + std::string("optimal")); } else { throw std::runtime_error( "No known way to generate matrix data found."); } } - std::string describe_config(rapidjson::Value& config) const + static std::string get_example_config() { - if (config.HasMember("filename")) { - return config["filename"].GetString(); - } else if (config.HasMember("stencil")) { + return json:: + parse(R"([{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}, {"filename": "my_file.mtx"}])") + .dump(4); + } + + static bool validate_config(const json& test_case) + { + return ((test_case.contains("size") && test_case.contains("stencil") && + test_case.contains("comm_pattern") && + test_case["size"].is_number_integer() && + test_case["stencil"].is_string() && + test_case["comm_pattern"].is_string()) || + (test_case.contains("filename") && + test_case["filename"].is_string())); + } + + static std::string describe_config(const json& config) + { + if (config.contains("filename")) { + return config["filename"].get(); + } else if (config.contains("stencil")) { std::stringstream ss; - ss << "stencil(" << config["size"].GetInt64() << "," - << config["stencil"].GetString() << "," - << config["comm_pattern"].GetString() << ")"; + ss << "stencil(" << config["size"].get() << "," + << config["stencil"].get() << "," + << config["comm_pattern"].get() << ")"; return ss.str(); } else { throw std::runtime_error("No known way to describe config."); @@ -205,29 +241,33 @@ struct DistributedDefaultSystemGenerator { } std::shared_ptr generate_matrix_with_optimal_format( - std::shared_ptr exec, rapidjson::Value& config) const + std::shared_ptr exec, json& config) const { auto data = generate_matrix_data(config); return generate_matrix_with_format( - std::move(exec), config["optimal"]["spmv"].GetString(), data); + std::move(exec), config["optimal"]["spmv"].get(), + data); } std::shared_ptr generate_matrix_with_format( std::shared_ptr exec, const std::string& format_name, const gko::matrix_data& data, - rapidjson::Value* spmv_case = nullptr, - rapidjson::MemoryPoolAllocator<>* allocator = nullptr) const + json* spmv_case = nullptr) const { auto part = gko::experimental::distributed:: Partition::build_from_global_size_uniform( exec, comm.size(), static_cast(data.size[0])); auto formats = split(format_name, '-'); + if (formats.size() != 2) { + throw std::runtime_error{"Invalid distributed format specifier " + + format_name}; + } auto local_mat = formats::matrix_type_factory.at(formats[0])(exec); auto non_local_mat = formats::matrix_type_factory.at(formats[1])(exec); auto storage_logger = std::make_shared(); - if (spmv_case && allocator) { + if (spmv_case) { exec->add_logger(storage_logger); } @@ -235,9 +275,9 @@ struct DistributedDefaultSystemGenerator { exec, comm, local_mat, non_local_mat); dist_mat->read_distributed(data, part); - if (spmv_case && allocator) { + if (spmv_case) { exec->remove_logger(storage_logger); - storage_logger->write_data(comm, *spmv_case, *allocator); + storage_logger->write_data(comm, *spmv_case); } return dist_mat; diff --git a/benchmark/utils/iteration_control.hpp b/benchmark/utils/iteration_control.hpp new file mode 100644 index 00000000000..295ae7870d6 --- /dev/null +++ b/benchmark/utils/iteration_control.hpp @@ -0,0 +1,326 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_ +#define GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_ + + +#include + + +#include +#include +#include + + +#include "benchmark/utils/general.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" +#include "core/distributed/helpers.hpp" + + +/** + * A class for controlling the number warmup and timed iterations. + * + * The behavior is determined by the following flags + * - 'repetitions' switch between fixed and adaptive number of iterations + * - 'warmup' warmup iterations, applies in fixed and adaptive case + * - 'min_repetitions' minimal number of repetitions (adaptive case) + * - 'max_repetitions' maximal number of repetitions (adaptive case) + * - 'min_runtime' minimal total runtime (adaptive case) + * - 'repetition_growth_factor' controls the increase between two successive + * timings + * + * Usage: + * `IterationControl` exposes the member functions: + * - `warmup_run()`: controls run defined by `warmup` flag + * - `run(bool)`: controls run defined by all other flags + * - `get_timer()`: access to underlying timer + * The first two methods return an object that is to be used in a range-based + * for loop: + * ``` + * IterationControl ic(get_timer(...)); + * + * // warmup run always uses fixed number of iteration and does not issue + * // timings + * for(auto status: ic.warmup_run()){ + * // execute benchmark + * } + * // run may use adaptive number of iterations (depending on cmd line flag) + * // and issues timing (unless manage_timings is false) + * for(auto status: ic.run(manage_timings [default is true])){ + * if(! manage_timings) ic.get_timer->tic(); + * // execute benchmark + * if(! manage_timings) ic.get_timer->toc(); + * } + * + * ``` + * At the beginning of both methods, the timer is reset. + * The `status` object exposes the member + * - `cur_it`, containing the current iteration number, + * and the methods + * - `is_finished`, checks if the benchmark is finished, + */ +class IterationControl { + using IndexType = unsigned int; //!< to be compatible with GFLAGS type + + class run_control; + +public: + /** + * Creates an `IterationControl` object. + * + * Uses the commandline flags to setup the stopping criteria for the + * warmup and timed run. + * + * @param timer the timer that is to be used for the timings + */ + explicit IterationControl(const std::shared_ptr& timer) + { + status_warmup_ = {TimerManager{timer, false}, FLAGS_warmup, + FLAGS_warmup, 0., 0}; + if (FLAGS_repetitions == "auto") { + status_run_ = {TimerManager{timer, true}, FLAGS_min_repetitions, + FLAGS_max_repetitions, FLAGS_min_runtime}; + } else { + const auto reps = + static_cast(std::stoi(FLAGS_repetitions)); + status_run_ = {TimerManager{timer, true}, reps, reps, 0., 0}; + } + } + + IterationControl() = default; + IterationControl(const IterationControl&) = default; + IterationControl(IterationControl&&) = default; + + /** + * Creates iterable `run_control` object for the warmup run. + * + * This run uses always a fixed number of iterations. + */ + run_control warmup_run() + { + status_warmup_.cur_it = 0; + status_warmup_.managed_timer.clear(); + return run_control{&status_warmup_}; + } + + /** + * Creates iterable `run_control` object for the timed run. + * + * This run may be adaptive, depending on the commandline flags. + * + * @param manage_timings If true, the timer calls (`tic/toc`) are handled + * by the `run_control` object, otherwise they need to be executed outside + */ + run_control run(bool manage_timings = true) + { + status_run_.cur_it = 0; + status_run_.managed_timer.clear(); + status_run_.managed_timer.manage_timings = manage_timings; + return run_control{&status_run_}; + } + + std::shared_ptr get_timer() const + { + return status_run_.managed_timer.timer; + } + + /** + * Compute the time from the given statistical method + * + * @param method the statistical method. If the timer does not have the + * same iteration as the IterationControl, it can only use + * average from the IterationControl. + * + * @return the statistical time + */ + double compute_time(const std::string& method = "average") const + { + if (status_run_.managed_timer.timer->get_num_repetitions() == + this->get_num_repetitions()) { + return status_run_.managed_timer.compute_time(method); + } else { + assert(method == "average"); + return status_run_.managed_timer.get_total_time() / + this->get_num_repetitions(); + } + } + + IndexType get_num_repetitions() const { return status_run_.cur_it; } + +private: + struct TimerManager { + std::shared_ptr timer; + bool manage_timings = false; + + void tic() + { + if (manage_timings) { + timer->tic(); + } + } + void toc(unsigned int num = 1) + { + if (manage_timings) { + timer->toc(num); + } + } + + void clear() { timer->clear(); } + + double get_total_time() const { return timer->get_total_time(); } + + double compute_time(const std::string& method = "average") const + { + return timer->compute_time(method); + } + }; + + /** + * Stores stopping criteria of the adaptive benchmark run as well as the + * current iteration number. + */ + struct status { + TimerManager managed_timer{}; + + IndexType min_it = 0; + IndexType max_it = 0; + double max_runtime = 0.; + + IndexType cur_it = 0; + + /** + * checks if the adaptive run is complete + * + * the adaptive run is complete if: + * - the minimum number of iteration is reached + * - and either: + * - the maximum number of repetitions is reached + * - the total runtime is above the threshold + * + * @return completeness state of the adaptive run + */ + bool is_finished() const + { + return cur_it >= min_it && + (cur_it >= max_it || + managed_timer.get_total_time() >= max_runtime); + } + }; + + /** + * Iterable class managing the benchmark iteration. + * + * Has to be used in a range-based for loop. + */ + struct run_control { + struct iterator { + /** + * Increases the current iteration count and finishes timing if + * necessary. + * + * As `++it` is the last step of a for-loop, the managed_timer is + * stopped, if enough iterations have passed since the last timing. + * The interval between two timings is steadily increased to + * reduce the timing overhead. + */ + iterator operator++() + { + cur_info->cur_it++; + if (cur_info->cur_it >= next_timing && !stopped) { + cur_info->managed_timer.toc( + static_cast(cur_info->cur_it - start_timing)); + stopped = true; + next_timing = static_cast(std::ceil( + next_timing * FLAGS_repetition_growth_factor)); + // If repetition_growth_factor <= 1, next_timing will be + // next iteration. + if (next_timing <= cur_info->cur_it) { + next_timing = cur_info->cur_it + 1; + } + } + return *this; + } + + status operator*() const { return *cur_info; } + + /** + * Checks if the benchmark is finished and handles timing, if + * necessary. + * + * As `begin != end` is the first step in a for-loop, the + * managed_timer is started, if it was previously stopped. + * Additionally, if the benchmark is complete and the managed_timer + * is still running it is stopped. (This may occur if the maximal + * number of repetitions is surpassed) + * + * Uses only the information from the `status` object, i.e. + * the right hand side is ignored. + * + * @return true if benchmark is not finished, else false + */ + bool operator!=(const iterator&) + { + const bool is_finished = cur_info->is_finished(); + if (!is_finished && stopped) { + stopped = false; + cur_info->managed_timer.tic(); + start_timing = cur_info->cur_it; + } else if (is_finished && !stopped) { + cur_info->managed_timer.toc( + static_cast(cur_info->cur_it - start_timing)); + stopped = true; + } + return !is_finished; + } + + status* cur_info; + IndexType next_timing = 1; //!< next iteration to stop timing + IndexType start_timing = 0; //!< iteration for starting timing + bool stopped = true; + }; + + iterator begin() const { return iterator{info}; } + + // not used, could potentially be used in c++17 as a sentinel + iterator end() const { return iterator{}; } + + status* info; + }; + + status status_warmup_; + status status_run_; +}; + + +#endif // GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_ diff --git a/benchmark/utils/json.hpp b/benchmark/utils/json.hpp index b0cd384cae5..684db0229aa 100644 --- a/benchmark/utils/json.hpp +++ b/benchmark/utils/json.hpp @@ -34,69 +34,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_BENCHMARK_UTILS_JSON_HPP_ -#include +#include -#include - - -#include -#include -#include -#include - - -// helper for setting rapidjson object members -template -std::enable_if_t< - !std::is_same::type, gko::size_type>::value, void> -add_or_set_member(rapidjson::Value& object, NameType&& name, T&& value, - Allocator&& allocator) -{ - if (object.HasMember(name)) { - object[name] = std::forward(value); - } else { - auto n = rapidjson::Value(name, allocator); - object.AddMember(n, std::forward(value), allocator); - } -} - - -/** - @internal This is required to fix some MacOS problems (and possibly other - compilers). There is no explicit RapidJSON constructor for `std::size_t` so a - conversion to a known constructor is required to solve any ambiguity. See the - last comments of https://github.com/ginkgo-project/ginkgo/issues/270. - */ -template -std::enable_if_t< - std::is_same::type, gko::size_type>::value, void> -add_or_set_member(rapidjson::Value& object, NameType&& name, T&& value, - Allocator&& allocator) -{ - if (object.HasMember(name)) { - object[name] = - std::forward(static_cast(value)); - } else { - auto n = rapidjson::Value(name, allocator); - object.AddMember( - n, std::forward(static_cast(value)), - allocator); - } -} - - -// helper for writing out rapidjson Values -inline std::ostream& operator<<(std::ostream& os, const rapidjson::Value& value) -{ - rapidjson::OStreamWrapper jos(os); - rapidjson::PrettyWriter, - rapidjson::UTF8<>, rapidjson::CrtAllocator, - rapidjson::kWriteNanAndInfFlag> - writer(jos); - value.Accept(writer); - return os; -} +using json = nlohmann::ordered_json; #endif // GKO_BENCHMARK_UTILS_JSON_HPP_ diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp index e3e6228604e..1e651811f0f 100644 --- a/benchmark/utils/loggers.hpp +++ b/benchmark/utils/loggers.hpp @@ -50,10 +50,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter, gko::log::ProfilerHook::NestedSummaryWriter { - JsonSummaryWriter(rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& alloc, - gko::uint32 repetitions) - : object{&object}, alloc{&alloc}, repetitions{repetitions} + JsonSummaryWriter(json& object, gko::uint32 repetitions) + : object{&object}, repetitions{repetitions} {} void write( @@ -62,13 +60,11 @@ struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter, { for (const auto& entry : entries) { if (entry.name != "total") { - add_or_set_member(*object, entry.name.c_str(), - entry.exclusive.count() * 1e-9 / repetitions, - *alloc); + (*object)[entry.name] = + entry.exclusive.count() * 1e-9 / repetitions; } } - add_or_set_member(*object, "overhead", - overhead.count() * 1e-9 / repetitions, *alloc); + (*object)["overhead"] = overhead.count() * 1e-9 / repetitions; } void write_nested(const gko::log::ProfilerHook::nested_summary_entry& root, @@ -84,27 +80,24 @@ struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter, visit(visit, child, new_prefix); exclusive -= child.elapsed; } - add_or_set_member(*object, (prefix + node.name).c_str(), - exclusive.count() * 1e-9 / repetitions, *alloc); + (*object)[prefix + node.name] = + exclusive.count() * 1e-9 / repetitions; }; // we don't need to annotate the total for (const auto& child : root.children) { visit(visit, child, ""); } - add_or_set_member(*object, "overhead", - overhead.count() * 1e-9 / repetitions, *alloc); + (*object)["overhead"] = overhead.count() * 1e-9 / repetitions; } - rapidjson::Value* object; - rapidjson::MemoryPoolAllocator<>* alloc; + json* object; gko::uint32 repetitions; }; inline std::shared_ptr create_operations_logger( bool gpu_timer, bool nested, std::shared_ptr exec, - rapidjson::Value& object, rapidjson::MemoryPoolAllocator<>& alloc, - gko::uint32 repetitions) + json& object, gko::uint32 repetitions) { std::shared_ptr timer; if (gpu_timer) { @@ -114,12 +107,10 @@ inline std::shared_ptr create_operations_logger( } if (nested) { return gko::log::ProfilerHook::create_nested_summary( - timer, - std::make_unique(object, alloc, repetitions)); + timer, std::make_unique(object, repetitions)); } else { return gko::log::ProfilerHook::create_summary( - timer, - std::make_unique(object, alloc, repetitions)); + timer, std::make_unique(object, repetitions)); } } @@ -140,21 +131,18 @@ struct StorageLogger : gko::log::Logger { storage[location] = 0; } - void write_data(rapidjson::Value& output, - rapidjson::MemoryPoolAllocator<>& allocator) + void write_data(json& output) { const std::lock_guard lock(mutex); gko::size_type total{}; for (const auto& e : storage) { total += e.second; } - add_or_set_member(output, "storage", total, allocator); + output["storage"] = total; } #if GINKGO_BUILD_MPI - void write_data(gko::experimental::mpi::communicator comm, - rapidjson::Value& output, - rapidjson::MemoryPoolAllocator<>& allocator) + void write_data(gko::experimental::mpi::communicator comm, json& output) { const std::lock_guard lock(mutex); gko::size_type total{}; @@ -166,7 +154,7 @@ struct StorageLogger : gko::log::Logger { ? static_cast(MPI_IN_PLACE) : &total, &total, 1, MPI_SUM, 0); - add_or_set_member(output, "storage", total, allocator); + output["storage"] = total; } #endif @@ -191,17 +179,16 @@ struct ResidualLogger : gko::log::Logger { const gko::array* status, bool all_stopped) const override { - timestamps.PushBack(std::chrono::duration( - std::chrono::steady_clock::now() - start) - .count(), - alloc); + timestamps.push_back(std::chrono::duration( + std::chrono::steady_clock::now() - start) + .count()); if (residual_norm) { - rec_res_norms.PushBack( - get_norm(gko::as>(residual_norm)), alloc); + rec_res_norms.push_back( + get_norm(gko::as>(residual_norm))); } else { gko::detail::vector_dispatch( residual, [&](const auto v_residual) { - rec_res_norms.PushBack(compute_norm2(v_residual), alloc); + rec_res_norms.push_back(compute_norm2(v_residual)); }); } if (solution) { @@ -209,32 +196,25 @@ struct ResidualLogger : gko::log::Logger { rc_vtype>(solution, [&](auto v_solution) { using concrete_type = std::remove_pointer_t>; - true_res_norms.PushBack( - compute_residual_norm(matrix, gko::as(b), - v_solution), - alloc); + true_res_norms.push_back(compute_residual_norm( + matrix, gko::as(b), v_solution)); }); } else { - true_res_norms.PushBack(-1.0, alloc); + true_res_norms.push_back(-1.0); } if (implicit_sq_residual_norm) { - implicit_res_norms.PushBack( - std::sqrt(get_norm( - gko::as>(implicit_sq_residual_norm))), - alloc); + implicit_res_norms.push_back(std::sqrt( + get_norm(gko::as>(implicit_sq_residual_norm)))); has_implicit_res_norm = true; } else { - implicit_res_norms.PushBack(-1.0, alloc); + implicit_res_norms.push_back(-1.0); } } ResidualLogger(gko::ptr_param matrix, - gko::ptr_param b, - rapidjson::Value& rec_res_norms, - rapidjson::Value& true_res_norms, - rapidjson::Value& implicit_res_norms, - rapidjson::Value& timestamps, - rapidjson::MemoryPoolAllocator<>& alloc) + gko::ptr_param b, json& rec_res_norms, + json& true_res_norms, json& implicit_res_norms, + json& timestamps) : gko::log::Logger(gko::log::Logger::iteration_complete_mask), matrix{matrix.get()}, b{b.get()}, @@ -243,8 +223,7 @@ struct ResidualLogger : gko::log::Logger { true_res_norms{true_res_norms}, has_implicit_res_norm{}, implicit_res_norms{implicit_res_norms}, - timestamps{timestamps}, - alloc{alloc} + timestamps{timestamps} {} bool has_implicit_res_norms() const { return has_implicit_res_norm; } @@ -253,12 +232,11 @@ struct ResidualLogger : gko::log::Logger { const gko::LinOp* matrix; const gko::LinOp* b; std::chrono::steady_clock::time_point start; - rapidjson::Value& rec_res_norms; - rapidjson::Value& true_res_norms; + json& rec_res_norms; + json& true_res_norms; mutable bool has_implicit_res_norm; - rapidjson::Value& implicit_res_norms; - rapidjson::Value& timestamps; - rapidjson::MemoryPoolAllocator<>& alloc; + json& implicit_res_norms; + json& timestamps; }; @@ -279,11 +257,7 @@ struct IterationLogger : gko::log::Logger { : gko::log::Logger(gko::log::Logger::iteration_complete_mask) {} - void write_data(rapidjson::Value& output, - rapidjson::MemoryPoolAllocator<>& allocator) - { - add_or_set_member(output, "iterations", this->num_iters, allocator); - } + void write_data(json& output) { output["iterations"] = this->num_iters; } private: mutable gko::size_type num_iters{0}; diff --git a/benchmark/utils/runner.hpp b/benchmark/utils/runner.hpp new file mode 100644 index 00000000000..3520f7299ee --- /dev/null +++ b/benchmark/utils/runner.hpp @@ -0,0 +1,209 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_BENCHMARK_UTILS_RUNNER_HPP_ +#define GKO_BENCHMARK_UTILS_RUNNER_HPP_ + + +#include + + +#include +#include +#include + + +#include "benchmark/utils/general.hpp" + + +std::shared_ptr create_profiler_hook( + std::shared_ptr exec, bool do_print = true) +{ + using gko::log::ProfilerHook; + std::map()>> + hook_map{ + {"none", [] { return std::shared_ptr{}; }}, + {"auto", [&] { return ProfilerHook::create_for_executor(exec); }}, + {"nvtx", [] { return ProfilerHook::create_nvtx(); }}, + {"roctx", [] { return ProfilerHook::create_roctx(); }}, + {"tau", [] { return ProfilerHook::create_tau(); }}, + {"vtune", [] { return ProfilerHook::create_vtune(); }}, + {"debug", [do_print] { + return ProfilerHook::create_custom( + [do_print](const char* name, + gko::log::profile_event_category) { + if (do_print) { + std::clog << "DEBUG: begin " << name << '\n'; + } + }, + [do_print](const char* name, + gko::log::profile_event_category) { + if (do_print) { + std::clog << "DEBUG: end " << name << '\n'; + } + }); + }}}; + return hook_map.at(FLAGS_profiler_hook)(); +} + + +template +struct Benchmark { + /** The name to be used in the JSON output. */ + virtual const std::string& get_name() const = 0; + + /** The operations to loop over for each test case. */ + virtual const std::vector& get_operations() const = 0; + + /** Should we write logging output? */ + virtual bool should_print() const = 0; + + /** Example JSON input */ + virtual std::string get_example_config() const = 0; + + /** Is the input test case in the correct format? */ + virtual bool validate_config(const json& value) const = 0; + + /** Textual representation of the test case for profiler annotation */ + virtual std::string describe_config(const json& test_case) const = 0; + + /** Sets up shared state and test case info */ + virtual State setup(std::shared_ptr exec, + json& test_case) const = 0; + + /** Runs a single operation of the benchmark */ + virtual void run(std::shared_ptr exec, + std::shared_ptr timer, State& state, + const std::string& operation, + json& operation_case) const = 0; + + /** Post-process test case info. */ + virtual void postprocess(json& test_case) const {} +}; + + +template +void run_test_cases(const Benchmark& benchmark, + std::shared_ptr exec, + std::shared_ptr timer, json& test_cases) +{ + if (!test_cases.is_array()) { + if (benchmark.should_print()) { + std::cerr + << "Input has to be a JSON array of benchmark configurations:\n" + << benchmark.get_example_config() << std::endl; + } + std::exit(1); + } + for (const auto& test_case : test_cases) { + if (!test_case.is_object() || !benchmark.validate_config(test_case)) { + if (benchmark.should_print()) { + std::cerr << "Invalid test case:\n" + << std::setw(4) << test_case << "\nInput format:\n" + << benchmark.get_example_config() << std::endl; + } + std::exit(2); + } + } + + auto profiler_hook = create_profiler_hook(exec, benchmark.should_print()); + if (profiler_hook) { + exec->add_logger(profiler_hook); + } + auto annotate = + [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + }; + + for (auto& test_case : test_cases) { + try { + // set up benchmark + if (!test_case.contains(benchmark.get_name())) { + test_case[benchmark.get_name()] = json::object(); + } + if (benchmark.should_print()) { + std::clog << "Running test case\n" + << std::setw(4) << test_case << std::endl; + } + auto test_case_state = benchmark.setup(exec, test_case); + auto test_case_str = benchmark.describe_config(test_case); + auto test_case_range = annotate(test_case_str.c_str()); + auto& benchmark_case = test_case[benchmark.get_name()]; + for (const auto& operation_name : benchmark.get_operations()) { + if (benchmark_case.contains(operation_name) && + !FLAGS_overwrite) { + continue; + } + benchmark_case[operation_name] = json::object(); + if (benchmark.should_print()) { + std::clog << "\tRunning " << benchmark.get_name() << ": " + << operation_name << std::endl; + } + auto& operation_case = benchmark_case[operation_name]; + try { + auto operation_range = annotate(operation_name.c_str()); + benchmark.run(exec, timer, test_case_state, operation_name, + operation_case); + operation_case["completed"] = true; + } catch (const std::exception& e) { + operation_case["completed"] = false; + operation_case["error_type"] = + gko::name_demangling::get_dynamic_type(e); + operation_case["error"] = e.what(); + std::cerr << "Error when processing test case\n" + << std::setw(4) << test_case << "\n" + << "what(): " << e.what() << std::endl; + } + + if (benchmark.should_print()) { + backup_results(test_cases); + } + } + benchmark.postprocess(test_case); + } catch (const std::exception& e) { + std::cerr << "Error setting up benchmark, what(): " << e.what() + << std::endl; + test_case["error_type"] = gko::name_demangling::get_dynamic_type(e); + test_case["error"] = e.what(); + } + } + + if (profiler_hook) { + exec->remove_logger(profiler_hook); + } +} + + +#endif // GKO_BENCHMARK_UTILS_RUNNER_HPP_ diff --git a/benchmark/utils/spmv_validation.hpp b/benchmark/utils/spmv_validation.hpp deleted file mode 100644 index 83ea2085ec2..00000000000 --- a/benchmark/utils/spmv_validation.hpp +++ /dev/null @@ -1,83 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2023, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#ifndef GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_ -#define GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_ - - -#include - - -#include -#include - - -#include - - -std::string example_config = R"( - [ - {"filename": "my_file.mtx"}, - {"filename": "my_file2.mtx"}, - {"size": 100, "stencil": "7pt"}, - ] -)"; - - -/** - * Function which outputs the input format for benchmarks similar to the spmv. - */ -[[noreturn]] void print_config_error_and_exit() -{ - std::cerr << "Input has to be a JSON array of matrix configurations:\n" - << example_config << std::endl; - std::exit(1); -} - - -/** - * Validates whether the input format is correct for spmv-like benchmarks. - * - * @param value the JSON value to test. - */ -void validate_option_object(const rapidjson::Value& value) -{ - if (!value.IsObject() || - !((value.HasMember("size") && value.HasMember("stencil") && - value["size"].IsInt64() && value["stencil"].IsString()) || - (value.HasMember("filename") && value["filename"].IsString()))) { - print_config_error_and_exit(); - } -} - - -#endif // GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_ diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index a54d4d506ee..828f95bc8ca 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -14,8 +14,8 @@ if(GINKGO_BUILD_BENCHMARKS) if (NOT gflags_FOUND) add_subdirectory(gflags) endif() - if (NOT RapidJSON_FOUND) - add_subdirectory(rapidjson) + if (NOT nlohmann_json_FOUND) + add_subdirectory(nlohmann_json) endif() endif() diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt new file mode 100644 index 00000000000..77064c66c40 --- /dev/null +++ b/third_party/nlohmann_json/CMakeLists.txt @@ -0,0 +1,9 @@ +message(STATUS "Fetching external nlohmann_json") +include(FetchContent) +FetchContent_Declare( + nlohmann_json + GIT_REPOSITORY https://github.com/nlohmann/json.git + GIT_TAG bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d +) +set(JSON_BuildTests OFF CACHE INTERNAL "") +FetchContent_MakeAvailable(nlohmann_json) diff --git a/third_party/rapidjson/CMakeLists.txt b/third_party/rapidjson/CMakeLists.txt deleted file mode 100644 index a96b90cb882..00000000000 --- a/third_party/rapidjson/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -message(STATUS "Fetching external RapidJSON") -include(FetchContent) -FetchContent_Declare( - rapidjson - GIT_REPOSITORY https://github.com/Tencent/rapidjson.git - GIT_TAG 27c3a8dc0e2c9218fe94986d249a12b5ed838f1d -) -FetchContent_GetProperties(rapidjson) -if(NOT rapidjson_POPULATED) - FetchContent_Populate(rapidjson) -endif() -set(RapidJSON_INCLUDE_DIR "${rapidjson_SOURCE_DIR}/include") -add_library(rapidjson INTERFACE) -set_target_properties(rapidjson PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${RapidJSON_INCLUDE_DIR}") From c1cee359b333d3b8438db4598c9afb79b89e3478 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 27 Jul 2023 23:45:43 +0200 Subject: [PATCH 02/13] add distributed tests again This reverts commit 0dab7626e920bfdf32a2285ff5741da1e36404cb. Additionally replaces the JSON test case output by their description --- benchmark/test/CMakeLists.txt | 4 +- benchmark/test/input.distributed_mtx.json | 7 + benchmark/test/multi_vector_distributed.py | 38 ++ benchmark/test/reference/blas.profile.stderr | 6 +- benchmark/test/reference/blas.simple.stderr | 6 +- .../test/reference/conversion.all.stderr | 7 +- .../test/reference/conversion.profile.stderr | 7 +- .../test/reference/conversion.simple.stderr | 7 +- .../distributed_solver.profile.stderr | 11 +- .../distributed_solver.simple.stderr | 11 +- .../reference/matrix_statistics.simple.stderr | 7 +- .../multi_vector_distributed.profile.stderr | 254 ++++++++++ .../multi_vector_distributed.profile.stdout | 29 ++ .../multi_vector_distributed.simple.stderr | 10 + .../multi_vector_distributed.simple.stdout | 29 ++ .../reference/preconditioner.profile.stderr | 7 +- .../reference/preconditioner.simple.stderr | 7 +- .../test/reference/solver.profile.stderr | 10 +- benchmark/test/reference/solver.simple.stderr | 10 +- .../test/reference/sparse_blas.profile.stderr | 7 +- .../test/reference/sparse_blas.simple.stderr | 7 +- benchmark/test/reference/spmv.profile.stderr | 7 +- benchmark/test/reference/spmv.simple.stderr | 7 +- .../reference/spmv_distributed.profile.stderr | 446 ++++++++++++++++++ .../reference/spmv_distributed.profile.stdout | 22 + .../reference/spmv_distributed.simple.stderr | 10 + .../reference/spmv_distributed.simple.stdout | 23 + benchmark/test/spmv_distributed.py | 42 ++ benchmark/test/test_framework.py.in | 2 +- benchmark/utils/general.hpp | 39 -- benchmark/utils/runner.hpp | 10 +- 31 files changed, 935 insertions(+), 154 deletions(-) create mode 100644 benchmark/test/input.distributed_mtx.json create mode 100644 benchmark/test/multi_vector_distributed.py create mode 100644 benchmark/test/reference/multi_vector_distributed.profile.stderr create mode 100644 benchmark/test/reference/multi_vector_distributed.profile.stdout create mode 100644 benchmark/test/reference/multi_vector_distributed.simple.stderr create mode 100644 benchmark/test/reference/multi_vector_distributed.simple.stdout create mode 100644 benchmark/test/reference/spmv_distributed.profile.stderr create mode 100644 benchmark/test/reference/spmv_distributed.profile.stdout create mode 100644 benchmark/test/reference/spmv_distributed.simple.stderr create mode 100644 benchmark/test/reference/spmv_distributed.simple.stdout create mode 100644 benchmark/test/spmv_distributed.py diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt index e1aab6dd75d..1cd589927fa 100644 --- a/benchmark/test/CMakeLists.txt +++ b/benchmark/test/CMakeLists.txt @@ -22,5 +22,7 @@ add_benchmark_test(solver) add_benchmark_test(sparse_blas) add_benchmark_test(spmv) if (GINKGO_BUILD_MPI) + add_benchmark_test(multi_vector_distributed) + add_benchmark_test(spmv_distributed) add_benchmark_test(solver_distributed) -endif() +endif() \ No newline at end of file diff --git a/benchmark/test/input.distributed_mtx.json b/benchmark/test/input.distributed_mtx.json new file mode 100644 index 00000000000..aca115179e6 --- /dev/null +++ b/benchmark/test/input.distributed_mtx.json @@ -0,0 +1,7 @@ +[ + { + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil" + } +] \ No newline at end of file diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py new file mode 100644 index 00000000000..1e0c4c8adf5 --- /dev/null +++ b/benchmark/test/multi_vector_distributed.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +import test_framework + +# check that all input modes work: +# parameter +test_framework.compare_output_distributed( + ["-input", '[{"n": 100}]'], + expected_stdout="multi_vector_distributed.simple.stdout", + expected_stderr="multi_vector_distributed.simple.stderr", + num_procs=3, +) + +# stdin +test_framework.compare_output_distributed( + [], + expected_stdout="multi_vector_distributed.simple.stdout", + expected_stderr="multi_vector_distributed.simple.stderr", + stdin='[{"n": 100}]', + num_procs=3, +) + +# file +test_framework.compare_output_distributed( + ["-input", str(test_framework.sourcepath / "input.blas.json")], + expected_stdout="multi_vector_distributed.simple.stdout", + expected_stderr="multi_vector_distributed.simple.stderr", + stdin='[{"n": 100}]', + num_procs=3, +) + +# profiler annotations +test_framework.compare_output_distributed( + ["-input", '[{"n": 100}]', "-profile", "-profiler_hook", "debug"], + expected_stdout="multi_vector_distributed.profile.stdout", + expected_stderr="multi_vector_distributed.profile.stderr", + stdin='[{"n": 100}]', + num_procs=3, +) diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr index b64f4321287..1313c85e462 100644 --- a/benchmark/test/reference/blas.profile.stderr +++ b/benchmark/test/reference/blas.profile.stderr @@ -4,11 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The operations are copy,axpy,scal -Running test case -{ - "n": 100, - "blas": {} -} +Running test case n = 100 DEBUG: begin n = 100 Running blas: copy DEBUG: begin copy diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr index f41b25c6ee1..966ed597166 100644 --- a/benchmark/test/reference/blas.simple.stderr +++ b/benchmark/test/reference/blas.simple.stderr @@ -4,11 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The operations are copy,axpy,scal -Running test case -{ - "n": 100, - "blas": {} -} +Running test case n = 100 Running blas: copy Running blas: axpy Running blas: scal diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr index 1d5df7477ba..77ff50a1b89 100644 --- a/benchmark/test/reference/conversion.all.stderr +++ b/benchmark/test/reference/conversion.all.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr,ell,sellp,hybrid -Running test case -{ - "size": 100, - "stencil": "7pt", - "conversion": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running conversion: coo-read Running conversion: coo-csr diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr index 089e6be02f9..6078dd3db2f 100644 --- a/benchmark/test/reference/conversion.profile.stderr +++ b/benchmark/test/reference/conversion.profile.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Running test case -{ - "size": 100, - "stencil": "7pt", - "conversion": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 DEBUG: begin stencil(100,7pt) Running conversion: coo-read diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr index a814dba6888..9b51effac09 100644 --- a/benchmark/test/reference/conversion.simple.stderr +++ b/benchmark/test/reference/conversion.simple.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Running test case -{ - "size": 100, - "stencil": "7pt", - "conversion": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running conversion: coo-read Running conversion: coo-csr diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr index e583a1411a8..1daab773a38 100644 --- a/benchmark/test/reference/distributed_solver.profile.stderr +++ b/benchmark/test/reference/distributed_solver.profile.stderr @@ -5,16 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "comm_pattern": "stencil", - "optimal": { - "spmv": "csr-csr" - }, - "solver": {} -} +Running test case stencil(100,7pt,stencil) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr index 9feb7fa9522..607081a3949 100644 --- a/benchmark/test/reference/distributed_solver.simple.stderr +++ b/benchmark/test/reference/distributed_solver.simple.stderr @@ -5,15 +5,6 @@ Running with 2 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "comm_pattern": "stencil", - "optimal": { - "spmv": "csr-csr" - }, - "solver": {} -} +Running test case stencil(100,7pt,stencil) Matrix is of size (125, 125) Running solver: cg diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr index 75a7cca709f..d02edbc44da 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stderr +++ b/benchmark/test/reference/matrix_statistics.simple.stderr @@ -1,9 +1,4 @@ This is Ginkgo 1.7.0 (develop) running with core module 1.7.0 (develop) -Running test case -{ - "size": 100, - "stencil": "7pt", - "problem": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr new file mode 100644 index 00000000000..a77484daacb --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr @@ -0,0 +1,254 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 0 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +The operations are copy,axpy,scal +Running test case n = 100 +DEBUG: begin n = 100 + Running blas: copy +DEBUG: begin copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end copy + Running blas: axpy +DEBUG: begin axpy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::add_scaled +DEBUG: end dense::add_scaled +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end axpy + Running blas: scal +DEBUG: begin scal +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::scale +DEBUG: end dense::scale +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end scal +DEBUG: end n = 100 diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout new file mode 100644 index 00000000000..3a2e7e54f80 --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed.profile.stdout @@ -0,0 +1,29 @@ + +[ + { + "n": 100, + "blas": { + "copy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 1, + "completed": true + }, + "axpy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 1, + "completed": true + }, + "scal": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 1, + "completed": true + } + } + } +] diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stderr b/benchmark/test/reference/multi_vector_distributed.simple.stderr new file mode 100644 index 00000000000..966ed597166 --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed.simple.stderr @@ -0,0 +1,10 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The operations are copy,axpy,scal +Running test case n = 100 + Running blas: copy + Running blas: axpy + Running blas: scal diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout new file mode 100644 index 00000000000..08e692727fe --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed.simple.stdout @@ -0,0 +1,29 @@ + +[ + { + "n": 100, + "blas": { + "copy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + }, + "axpy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + }, + "scal": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + } + } + } +] diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr index c215b22c925..def3a83993d 100644 --- a/benchmark/test/reference/preconditioner.profile.stderr +++ b/benchmark/test/reference/preconditioner.profile.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running with preconditioners: none -Running test case -{ - "size": 100, - "stencil": "7pt", - "preconditioner": {} -} +Running test case stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::fill_array diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr index 07d2cca6704..0090e180d2b 100644 --- a/benchmark/test/reference/preconditioner.simple.stderr +++ b/benchmark/test/reference/preconditioner.simple.stderr @@ -4,11 +4,6 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 Running with preconditioners: none -Running test case -{ - "size": 100, - "stencil": "7pt", - "preconditioner": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running preconditioner: none diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr index 0c3f7060796..43ff852f68e 100644 --- a/benchmark/test/reference/solver.profile.stderr +++ b/benchmark/test/reference/solver.profile.stderr @@ -5,15 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "optimal": { - "spmv": "csr" - }, - "solver": {} -} +Running test case stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::fill_array diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr index c5e4267a6bd..659dd026588 100644 --- a/benchmark/test/reference/solver.simple.stderr +++ b/benchmark/test/reference/solver.simple.stderr @@ -5,14 +5,6 @@ Running with 2 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "optimal": { - "spmv": "csr" - }, - "solver": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125) Running solver: cg diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr index d1434dad146..c47ce2a515b 100644 --- a/benchmark/test/reference/sparse_blas.profile.stderr +++ b/benchmark/test/reference/sparse_blas.profile.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The operations are transpose -Running test case -{ - "size": 100, - "stencil": "7pt", - "sparse_blas": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 DEBUG: begin allocate DEBUG: end allocate diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr index 452374a9268..1f2bb34809f 100644 --- a/benchmark/test/reference/sparse_blas.simple.stderr +++ b/benchmark/test/reference/sparse_blas.simple.stderr @@ -4,11 +4,6 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The operations are transpose -Running test case -{ - "size": 100, - "stencil": "7pt", - "sparse_blas": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running sparse_blas: transpose diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr index 09a10b725ea..4ff0125782f 100644 --- a/benchmark/test/reference/spmv.profile.stderr +++ b/benchmark/test/reference/spmv.profile.stderr @@ -5,12 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The formats are coo The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "spmv": {} -} +Running test case stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin allocate diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr index a910512ff31..9d5047febb6 100644 --- a/benchmark/test/reference/spmv.simple.stderr +++ b/benchmark/test/reference/spmv.simple.stderr @@ -5,11 +5,6 @@ Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "spmv": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running spmv: coo diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr new file mode 100644 index 00000000000..95a07c8275c --- /dev/null +++ b/benchmark/test/reference/spmv_distributed.profile.stderr @@ -0,0 +1,446 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 0 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +The formats are [csr]x[csr] +The number of right hand sides is 1 +Running test case stencil(100,7pt,stencil) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill_in_matrix_data +DEBUG: end dense::fill_in_matrix_data +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill_in_matrix_data +DEBUG: end dense::fill_in_matrix_data +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +Matrix is of size (81, 81), 144 +DEBUG: begin stencil(100,7pt,stencil) + Running spmv: csr-csr +DEBUG: begin csr-csr +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin copy() +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: end copy() +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin copy() +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: end copy() +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin distributed_matrix::build_local_nonlocal +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end distributed_matrix::build_local_nonlocal +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin components::convert_idxs_to_ptrs +DEBUG: end components::convert_idxs_to_ptrs +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin components::convert_idxs_to_ptrs +DEBUG: end components::convert_idxs_to_ptrs +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy() +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: end copy() +DEBUG: begin apply() +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::row_gather +DEBUG: end dense::row_gather +DEBUG: begin apply() +DEBUG: begin csr::spmv +DEBUG: end csr::spmv +DEBUG: end apply() +DEBUG: begin advanced_apply() +DEBUG: begin csr::advanced_spmv +DEBUG: end csr::advanced_spmv +DEBUG: end advanced_apply() +DEBUG: end apply() +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end csr-csr +DEBUG: end stencil(100,7pt,stencil) +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout new file mode 100644 index 00000000000..ebacddb887c --- /dev/null +++ b/benchmark/test/reference/spmv_distributed.profile.stdout @@ -0,0 +1,22 @@ + +[ + { + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil", + "spmv": { + "csr-csr": { + "storage": 6420, + "time": 1.0, + "repetitions": 1, + "completed": true + } + }, + "rows": 81, + "cols": 81, + "nonzeros": 144, + "optimal": { + "spmv": "csr-csr" + } + } +] diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr new file mode 100644 index 00000000000..0df742d5b9b --- /dev/null +++ b/benchmark/test/reference/spmv_distributed.simple.stderr @@ -0,0 +1,10 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The formats are [csr]x[csr] +The number of right hand sides is 1 +Running test case stencil(100,7pt,stencil) +Matrix is of size (81, 81), 144 + Running spmv: csr-csr diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout new file mode 100644 index 00000000000..64203476f91 --- /dev/null +++ b/benchmark/test/reference/spmv_distributed.simple.stdout @@ -0,0 +1,23 @@ + +[ + { + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil", + "spmv": { + "csr-csr": { + "storage": 6420, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + } + }, + "rows": 81, + "cols": 81, + "nonzeros": 144, + "optimal": { + "spmv": "csr-csr" + } + } +] diff --git a/benchmark/test/spmv_distributed.py b/benchmark/test/spmv_distributed.py new file mode 100644 index 00000000000..356db48459e --- /dev/null +++ b/benchmark/test/spmv_distributed.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +import test_framework + +# check that all input modes work: +# parameter +test_framework.compare_output_distributed( + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'], + expected_stdout="spmv_distributed.simple.stdout", + expected_stderr="spmv_distributed.simple.stderr", + num_procs=3, +) + +# stdin +test_framework.compare_output_distributed( + [], + expected_stdout="spmv_distributed.simple.stdout", + expected_stderr="spmv_distributed.simple.stderr", + num_procs=3, + stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', +) + +# input file +test_framework.compare_output_distributed( + ["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")], + expected_stdout="spmv_distributed.simple.stdout", + expected_stderr="spmv_distributed.simple.stderr", + num_procs=3, +) + +# profiler annotations +test_framework.compare_output_distributed( + [ + "-input", + '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', + "-profile", + "-profiler_hook", + "debug", + ], + expected_stdout="spmv_distributed.profile.stdout", + expected_stderr="spmv_distributed.profile.stderr", + num_procs=3, +) diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index da1b0bfd618..faf898a21cb 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -247,7 +247,7 @@ def compare_output( def compare_output_distributed( args, expected_stdout, expected_stderr, num_procs, stdin="" ): - compare_output( + compare_output_impl( args, expected_stdout, expected_stderr, diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index 41acb560ba1..1c48680f883 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -245,45 +245,6 @@ std::shared_ptr create_profiler_hook( } -struct owning_profiling_scope_guard { - std::string name; - gko::log::profiling_scope_guard guard; - - owning_profiling_scope_guard() = default; - - owning_profiling_scope_guard(std::string name_, - gko::log::ProfilerHook* profiler_hook) - : name(std::move(name_)), guard{profiler_hook->user_range(name.c_str())} - {} -}; - - -struct annotate_functor { - owning_profiling_scope_guard operator()(std::string name) const - { - if (profiler_hook) { - return owning_profiling_scope_guard{std::move(name), - profiler_hook.get()}; - } - return {}; - } - - gko::log::profiling_scope_guard operator()(const char* name) const - { - if (profiler_hook) { - return profiler_hook->user_range(name); - } - return {}; - } - - annotate_functor(std::shared_ptr profiler_hook) - : profiler_hook{std::move(profiler_hook)} - {} - - std::shared_ptr profiler_hook; -}; - - // Returns a random number engine std::default_random_engine& get_engine() { diff --git a/benchmark/utils/runner.hpp b/benchmark/utils/runner.hpp index 3520f7299ee..661c403706f 100644 --- a/benchmark/utils/runner.hpp +++ b/benchmark/utils/runner.hpp @@ -153,13 +153,13 @@ void run_test_cases(const Benchmark& benchmark, if (!test_case.contains(benchmark.get_name())) { test_case[benchmark.get_name()] = json::object(); } + auto test_case_desc = benchmark.describe_config(test_case); if (benchmark.should_print()) { - std::clog << "Running test case\n" - << std::setw(4) << test_case << std::endl; + std::clog << "Running test case " << test_case_desc + << std::endl; } auto test_case_state = benchmark.setup(exec, test_case); - auto test_case_str = benchmark.describe_config(test_case); - auto test_case_range = annotate(test_case_str.c_str()); + auto test_case_range = annotate(test_case_desc.c_str()); auto& benchmark_case = test_case[benchmark.get_name()]; for (const auto& operation_name : benchmark.get_operations()) { if (benchmark_case.contains(operation_name) && @@ -183,7 +183,7 @@ void run_test_cases(const Benchmark& benchmark, gko::name_demangling::get_dynamic_type(e); operation_case["error"] = e.what(); std::cerr << "Error when processing test case\n" - << std::setw(4) << test_case << "\n" + << test_case_desc << "\n" << "what(): " << e.what() << std::endl; } From ca3fccf3e9a03d63e3e2032556e8edd1543cea67 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 27 Jul 2023 23:44:32 +0200 Subject: [PATCH 03/13] handle JSON and non-JSON test output separately --- benchmark/test/reference/blas.profile.stdout | 3 +- benchmark/test/reference/blas.simple.stdout | 3 +- .../test/reference/conversion.all.stdout | 3 +- .../test/reference/conversion.profile.stdout | 3 +- .../test/reference/conversion.simple.stdout | 3 +- .../distributed_solver.profile.stdout | 3 +- .../distributed_solver.simple.stdout | 3 +- .../reference/matrix_statistics.simple.stdout | 3 +- .../multi_vector_distributed.profile.stdout | 3 +- .../multi_vector_distributed.simple.stdout | 3 +- .../reference/preconditioner.profile.stdout | 3 +- .../reference/preconditioner.simple.stdout | 3 +- .../test/reference/solver.profile.stdout | 3 +- benchmark/test/reference/solver.simple.stdout | 3 +- .../test/reference/sparse_blas.profile.stdout | 3 +- .../test/reference/sparse_blas.simple.stdout | 3 +- benchmark/test/reference/spmv.profile.stdout | 3 +- benchmark/test/reference/spmv.simple.stdout | 3 +- .../reference/spmv_distributed.profile.stdout | 3 +- .../reference/spmv_distributed.simple.stdout | 3 +- benchmark/test/test_framework.py.in | 78 ++++++------------- 21 files changed, 44 insertions(+), 94 deletions(-) diff --git a/benchmark/test/reference/blas.profile.stdout b/benchmark/test/reference/blas.profile.stdout index 3a2e7e54f80..8998d5eaed7 100644 --- a/benchmark/test/reference/blas.profile.stdout +++ b/benchmark/test/reference/blas.profile.stdout @@ -1,4 +1,3 @@ - [ { "n": 100, @@ -26,4 +25,4 @@ } } } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/blas.simple.stdout b/benchmark/test/reference/blas.simple.stdout index 08e692727fe..a586a9bc57b 100644 --- a/benchmark/test/reference/blas.simple.stdout +++ b/benchmark/test/reference/blas.simple.stdout @@ -1,4 +1,3 @@ - [ { "n": 100, @@ -26,4 +25,4 @@ } } } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/conversion.all.stdout b/benchmark/test/reference/conversion.all.stdout index c4b657a42c4..0c77d464793 100644 --- a/benchmark/test/reference/conversion.all.stdout +++ b/benchmark/test/reference/conversion.all.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -74,4 +73,4 @@ "cols": 125, "nonzeros": 725 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/conversion.profile.stdout b/benchmark/test/reference/conversion.profile.stdout index b29815f6c17..a9c3ea674fa 100644 --- a/benchmark/test/reference/conversion.profile.stdout +++ b/benchmark/test/reference/conversion.profile.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -29,4 +28,4 @@ "cols": 125, "nonzeros": 725 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/conversion.simple.stdout b/benchmark/test/reference/conversion.simple.stdout index 856f1330eea..81c735789d1 100644 --- a/benchmark/test/reference/conversion.simple.stdout +++ b/benchmark/test/reference/conversion.simple.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -29,4 +28,4 @@ "cols": 125, "nonzeros": 725 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout index aef92652256..55dfb1dc428 100644 --- a/benchmark/test/reference/distributed_solver.profile.stdout +++ b/benchmark/test/reference/distributed_solver.profile.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -31,4 +30,4 @@ "rows": 125, "cols": 125 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout index 002b9d91347..eed8d864388 100644 --- a/benchmark/test/reference/distributed_solver.simple.stdout +++ b/benchmark/test/reference/distributed_solver.simple.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -57,4 +56,4 @@ "rows": 125, "cols": 125 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/matrix_statistics.simple.stdout b/benchmark/test/reference/matrix_statistics.simple.stdout index 13746ce8a46..923bbc9f962 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stdout +++ b/benchmark/test/reference/matrix_statistics.simple.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -38,4 +37,4 @@ "cols": 125, "nonzeros": 725 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout index 3a2e7e54f80..8998d5eaed7 100644 --- a/benchmark/test/reference/multi_vector_distributed.profile.stdout +++ b/benchmark/test/reference/multi_vector_distributed.profile.stdout @@ -1,4 +1,3 @@ - [ { "n": 100, @@ -26,4 +25,4 @@ } } } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout index 08e692727fe..a586a9bc57b 100644 --- a/benchmark/test/reference/multi_vector_distributed.simple.stdout +++ b/benchmark/test/reference/multi_vector_distributed.simple.stdout @@ -1,4 +1,3 @@ - [ { "n": 100, @@ -26,4 +25,4 @@ } } } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout index f53407d818d..e33a6502eea 100644 --- a/benchmark/test/reference/preconditioner.profile.stdout +++ b/benchmark/test/reference/preconditioner.profile.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -22,4 +21,4 @@ "cols": 125, "nonzeros": 725 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout index 92bb51ddb57..06291228a1c 100644 --- a/benchmark/test/reference/preconditioner.simple.stdout +++ b/benchmark/test/reference/preconditioner.simple.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -30,4 +29,4 @@ "cols": 125, "nonzeros": 725 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout index 0148e6ef092..906c74de5e7 100644 --- a/benchmark/test/reference/solver.profile.stdout +++ b/benchmark/test/reference/solver.profile.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -30,4 +29,4 @@ "rows": 125, "cols": 125 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout index b4e7b56b2bf..5d127fe4b78 100644 --- a/benchmark/test/reference/solver.simple.stdout +++ b/benchmark/test/reference/solver.simple.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -54,4 +53,4 @@ "rows": 125, "cols": 125 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/sparse_blas.profile.stdout b/benchmark/test/reference/sparse_blas.profile.stdout index 848fb503ed4..e9d48fde23d 100644 --- a/benchmark/test/reference/sparse_blas.profile.stdout +++ b/benchmark/test/reference/sparse_blas.profile.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -16,4 +15,4 @@ "cols": 125, "nonzeros": 725 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/sparse_blas.simple.stdout b/benchmark/test/reference/sparse_blas.simple.stdout index f39300ca35b..3cc5f774ebf 100644 --- a/benchmark/test/reference/sparse_blas.simple.stdout +++ b/benchmark/test/reference/sparse_blas.simple.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -23,4 +22,4 @@ "cols": 125, "nonzeros": 725 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout index 5302d54f9f0..409a92d4e33 100644 --- a/benchmark/test/reference/spmv.profile.stdout +++ b/benchmark/test/reference/spmv.profile.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -18,4 +17,4 @@ "spmv": "coo" } } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/spmv.simple.stdout b/benchmark/test/reference/spmv.simple.stdout index 737938d7c96..9601a15b331 100644 --- a/benchmark/test/reference/spmv.simple.stdout +++ b/benchmark/test/reference/spmv.simple.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -19,4 +18,4 @@ "spmv": "coo" } } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout index ebacddb887c..8de6a68ae8a 100644 --- a/benchmark/test/reference/spmv_distributed.profile.stdout +++ b/benchmark/test/reference/spmv_distributed.profile.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -19,4 +18,4 @@ "spmv": "csr-csr" } } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout index 64203476f91..f94e4b992a1 100644 --- a/benchmark/test/reference/spmv_distributed.simple.stdout +++ b/benchmark/test/reference/spmv_distributed.simple.stdout @@ -1,4 +1,3 @@ - [ { "size": 100, @@ -20,4 +19,4 @@ "spmv": "csr-csr" } } -] +] \ No newline at end of file diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index faf898a21cb..3deb282297a 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -22,7 +22,8 @@ denumberify_paths = [ "rhs_norm", "max_relative_norm2", ] -empty_string_paths = ["error", "filename"] +detypenameify_key_starts = ["generate(", "apply(", "advanced_apply(", "copy(", "check("] +empty_string_paths = ["filename"] empty_array_paths = [ "recurrent_residuals", "true_residuals", @@ -31,6 +32,18 @@ empty_array_paths = [ ] +def sanitize_json_key(key: str): + """Applies sanitation to a single key. + + Strings that start with a name in detypenameify_key_starts will be truncated + """ + + for start in detypenameify_key_starts: + if key.startswith(start): + return start + ")" + return key + + def sanitize_json_key_value(key: str, value, sanitize_all: bool): """Applies sanitation to a single key-value pair. @@ -59,7 +72,7 @@ def sanitize_json(parsed_input, sanitize_all: bool = False): if isinstance(parsed_input, dict): return { - key: sanitize_json_key_value(key, value, sanitize_all) + sanitize_json_key(key): sanitize_json_key_value(key, value, sanitize_all) for key, value in parsed_input.items() } elif isinstance(parsed_input, list): @@ -70,40 +83,15 @@ def sanitize_json(parsed_input, sanitize_all: bool = False): return parsed_input -def sanitize_json_in_text(lines: List[str]) -> List[str]: - """Sanitizes all occurrences of JSON content inside text input. +def determinize_json_text(input: str) -> List[str]: + """Sanitizes the given input JSON string. - Takes a list of text lines and detects any pretty-printed JSON output inside - (recognized by a single [, {, } or ] in an otherwise empty line). - The JSON output will be parsed and sanitized through sanitize_json(...) + The JSON values will be parsed and sanitized through sanitize_json(...) and pretty-printed to replace the original JSON input. - The function returns the resulting output. """ - json_begins = [i for i, l in enumerate(lines) if l in ["[", "{"]] - json_ends = [i + 1 for i, l in enumerate(lines) if l in ["]", "}"]] - json_pairs = list(zip(json_begins, json_ends)) - if len(json_pairs) == 0: - return lines - assert all(begin < end for begin, end in json_pairs) - nonjson_pairs = ( - [(0, json_begins[0])] - + list(zip(json_ends[:-1], json_begins[1:])) - + [(json_ends[-1], len(lines))] - ) - combined_pairs = sorted( - [(begin, end, False) for begin, end in nonjson_pairs] - + [(begin, end, True) for begin, end in json_pairs] - ) - texts = [ - ("\n".join(lines[begin:end]), do_sanitize) - for begin, end, do_sanitize in combined_pairs - ] - reconstructed = [ - json.dumps(sanitize_json(json.loads(t)), indent=4) if do_sanitize else t - for t, do_sanitize in texts - ] - return "\n".join(reconstructed).split("\n") + result = json.dumps(sanitize_json(json.loads(input)), indent=4) + return result.splitlines() def determinize_text( @@ -116,9 +104,6 @@ def determinize_text( Every input line matching an entry from ignore_patterns will be removed. Every line matching the first string in an entry from replace_patterns will be replaced by the second string. - Finally, the text will be passed to sanitize_json_in_text, which removes - nondeterministic parts from JSON objects/arrays in the input, - if it can be parsed correctly. The output is guaranteed to end with an empty line. """ @@ -137,10 +122,7 @@ def determinize_text( output_lines.append(line) if len(output_lines) == 0 or output_lines[-1] != "": output_lines.append("") - try: - return sanitize_json_in_text(output_lines) - except json.decoder.JSONDecodeError: - return output_lines + return output_lines def compare_output_impl( @@ -173,13 +155,7 @@ def compare_output_impl( ] if generate: open(expected_stdout, "w").write( - "\n".join( - determinize_text( - result.stdout.decode(), - ignore_patterns=[], - replace_patterns=typename_patterns, - ) - ) + "\n".join(determinize_json_text(result.stdout.decode())) ) open(expected_stderr, "w").write( "\n".join( @@ -192,19 +168,13 @@ def compare_output_impl( ) print("GENERATED") return - result_stdout_processed = determinize_text( - result.stdout.decode(), ignore_patterns=[], replace_patterns=typename_patterns - ) + result_stdout_processed = determinize_json_text(result.stdout.decode()) result_stderr_processed = determinize_text( result.stderr.decode(), ignore_patterns=version_patterns, replace_patterns=typename_patterns, ) - expected_stdout_processed = determinize_text( - open(expected_stdout).read(), - ignore_patterns=[], - replace_patterns=typename_patterns, - ) + expected_stdout_processed = determinize_json_text(open(expected_stdout).read()) expected_stderr_processed = determinize_text( open(expected_stderr).read(), ignore_patterns=version_patterns, From d27b507474d04567a36adbdf9a103d9bdabaf5c9 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Fri, 28 Jul 2023 00:15:00 +0200 Subject: [PATCH 04/13] benchmark reads on device_matrix_data --- benchmark/conversion/conversion.cpp | 11 +++-- .../test/reference/conversion.profile.stderr | 46 +++++++++++++++---- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp index b9a5d5c46d6..5f03cb2b933 100644 --- a/benchmark/conversion/conversion.cpp +++ b/benchmark/conversion/conversion.cpp @@ -60,7 +60,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. using Generator = DefaultSystemGenerator<>; -struct ConversionBenchmark : Benchmark> { +struct ConversionBenchmark : Benchmark> { std::string name; std::vector operations; @@ -112,8 +112,8 @@ struct ConversionBenchmark : Benchmark> { return Generator::describe_config(test_case); } - gko::matrix_data setup(std::shared_ptr exec, - json& test_case) const override + gko::device_matrix_data setup( + std::shared_ptr exec, json& test_case) const override { gko::matrix_data data; data = Generator::generate_matrix_data(test_case); @@ -122,12 +122,13 @@ struct ConversionBenchmark : Benchmark> { test_case["rows"] = data.size[0]; test_case["cols"] = data.size[1]; test_case["nonzeros"] = data.nonzeros.size(); - return data; + return gko::device_matrix_data::create_from_host(exec, + data); } void run(std::shared_ptr exec, std::shared_ptr timer, - gko::matrix_data& data, + gko::device_matrix_data& data, const std::string& operation_name, json& operation_case) const override { diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr index 6078dd3db2f..ca80375c5bf 100644 --- a/benchmark/test/reference/conversion.profile.stderr +++ b/benchmark/test/reference/conversion.profile.stderr @@ -6,17 +6,29 @@ The random seed for right hand sides is 42 The formats are coo,csr Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa DEBUG: begin stencil(100,7pt) Running conversion: coo-read DEBUG: begin coo-read DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin components::aos_to_soa -DEBUG: end components::aos_to_soa +DEBUG: begin copy +DEBUG: end copy DEBUG: begin free DEBUG: end free DEBUG: begin free @@ -28,12 +40,16 @@ DEBUG: end coo-read DEBUG: begin coo-csr DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin components::aos_to_soa -DEBUG: end components::aos_to_soa +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::fill_array @@ -75,12 +91,16 @@ DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin components::aos_to_soa -DEBUG: end components::aos_to_soa +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate DEBUG: begin free @@ -104,12 +124,16 @@ DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate -DEBUG: begin components::aos_to_soa -DEBUG: end components::aos_to_soa +DEBUG: begin copy +DEBUG: end copy DEBUG: begin allocate DEBUG: end allocate DEBUG: begin free @@ -146,3 +170,9 @@ DEBUG: begin free DEBUG: end free DEBUG: end csr-coo DEBUG: end stencil(100,7pt) +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free From e3af0296829eedb9072b8c2d75bc129123837ece Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 17 Aug 2023 21:17:05 +0200 Subject: [PATCH 05/13] remove allocations from output they are sometimes implementation-dependent for libstdc++ types --- benchmark/test/reference/blas.profile.stderr | 28 -- .../test/reference/conversion.profile.stderr | 104 ------ .../distributed_solver.profile.stderr | 232 ------------- .../multi_vector_distributed.profile.stderr | 128 -------- .../reference/preconditioner.profile.stderr | 44 --- .../test/reference/solver.profile.stderr | 132 -------- .../test/reference/sparse_blas.profile.stderr | 36 -- benchmark/test/reference/spmv.profile.stderr | 48 --- .../reference/spmv_distributed.profile.stderr | 308 ------------------ benchmark/test/test_framework.py.in | 11 +- 10 files changed, 6 insertions(+), 1065 deletions(-) diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr index 1313c85e462..529fc16009c 100644 --- a/benchmark/test/reference/blas.profile.stderr +++ b/benchmark/test/reference/blas.profile.stderr @@ -8,27 +8,13 @@ Running test case n = 100 DEBUG: begin n = 100 Running blas: copy DEBUG: begin copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::copy DEBUG: end dense::copy -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end copy Running blas: axpy DEBUG: begin axpy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill @@ -37,28 +23,14 @@ DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::add_scaled DEBUG: end dense::add_scaled -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end axpy Running blas: scal DEBUG: begin scal -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::scale DEBUG: end dense::scale -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end scal DEBUG: end n = 100 diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr index ca80375c5bf..a233579c721 100644 --- a/benchmark/test/reference/conversion.profile.stderr +++ b/benchmark/test/reference/conversion.profile.stderr @@ -6,173 +6,69 @@ The random seed for right hand sides is 42 The formats are coo,csr Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa DEBUG: begin stencil(100,7pt) Running conversion: coo-read DEBUG: begin coo-read -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end coo-read Running conversion: coo-csr DEBUG: begin coo-csr -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs DEBUG: end copy() -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end coo-csr Running conversion: csr-read DEBUG: begin csr-read -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end csr-read Running conversion: csr-coo DEBUG: begin csr-coo -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::convert_ptrs_to_idxs DEBUG: end components::convert_ptrs_to_idxs DEBUG: end copy() -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end csr-coo DEBUG: end stencil(100,7pt) -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr index 1daab773a38..4ea20730117 100644 --- a/benchmark/test/reference/distributed_solver.profile.stderr +++ b/benchmark/test/reference/distributed_solver.profile.stderr @@ -6,18 +6,8 @@ The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 Running test case stencil(100,7pt,stencil) -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -32,18 +22,10 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin copy() @@ -56,8 +38,6 @@ DEBUG: end copy DEBUG: begin copy DEBUG: end copy DEBUG: end copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin copy() @@ -70,135 +50,29 @@ DEBUG: end copy DEBUG: begin copy DEBUG: end copy DEBUG: end copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin distributed_matrix::build_local_nonlocal -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: end distributed_matrix::build_local_nonlocal DEBUG: begin copy DEBUG: end copy DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() @@ -206,62 +80,30 @@ Matrix is of size (125, 125) DEBUG: begin stencil(100,7pt,stencil) Running solver: cg DEBUG: begin cg -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::compute_squared_norm2 DEBUG: end dense::compute_squared_norm2 DEBUG: begin dense::compute_sqrt DEBUG: end dense::compute_sqrt DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() -DEBUG: begin free -DEBUG: end free DEBUG: begin generate() DEBUG: begin generate() DEBUG: end generate() DEBUG: end generate() DEBUG: begin apply() DEBUG: begin iteration -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill DEBUG: end dense::fill -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin cg::initialize DEBUG: end cg::initialize DEBUG: begin advanced_apply() @@ -276,20 +118,10 @@ DEBUG: begin csr::advanced_spmv DEBUG: end csr::advanced_spmv DEBUG: end advanced_apply() DEBUG: end advanced_apply() -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::compute_squared_norm2 DEBUG: end dense::compute_squared_norm2 DEBUG: begin dense::compute_sqrt DEBUG: end dense::compute_sqrt -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin apply() DEBUG: begin copy() DEBUG: begin dense::copy @@ -586,25 +418,9 @@ DEBUG: begin residual_norm::residual_norm DEBUG: end residual_norm::residual_norm DEBUG: end check() DEBUG: end check() -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end iteration DEBUG: end apply() -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() @@ -620,59 +436,11 @@ DEBUG: begin csr::advanced_spmv DEBUG: end csr::advanced_spmv DEBUG: end advanced_apply() DEBUG: end advanced_apply() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::compute_squared_norm2 DEBUG: end dense::compute_squared_norm2 DEBUG: begin dense::compute_sqrt DEBUG: end dense::compute_sqrt DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end cg DEBUG: end stencil(100,7pt,stencil) -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr index a77484daacb..102330e38f4 100644 --- a/benchmark/test/reference/multi_vector_distributed.profile.stderr +++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr @@ -8,18 +8,8 @@ Running test case n = 100 DEBUG: begin n = 100 Running blas: copy DEBUG: begin copy -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -34,32 +24,10 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -74,45 +42,17 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::copy DEBUG: end dense::copy -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end copy Running blas: axpy DEBUG: begin axpy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -127,32 +67,10 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -167,20 +85,8 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill @@ -189,29 +95,11 @@ DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::add_scaled DEBUG: end dense::add_scaled -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end axpy Running blas: scal DEBUG: begin scal -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -226,29 +114,13 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::scale DEBUG: end dense::scale -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end scal DEBUG: end n = 100 diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr index def3a83993d..610dfe464ec 100644 --- a/benchmark/test/reference/preconditioner.profile.stderr +++ b/benchmark/test/reference/preconditioner.profile.stderr @@ -5,50 +5,20 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running with preconditioners: none Running test case stencil(100,7pt) -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data @@ -58,8 +28,6 @@ DEBUG: begin stencil(100,7pt) Running preconditioner: none DEBUG: begin none DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() @@ -71,17 +39,5 @@ DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() DEBUG: end apply() -DEBUG: begin free -DEBUG: end free DEBUG: end none DEBUG: end stencil(100,7pt) -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr index 43ff852f68e..238591eb0c9 100644 --- a/benchmark/test/reference/solver.profile.stderr +++ b/benchmark/test/reference/solver.profile.stderr @@ -6,49 +6,19 @@ The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 Running test case stencil(100,7pt) -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() @@ -56,78 +26,36 @@ Matrix is of size (125, 125) DEBUG: begin stencil(100,7pt) Running solver: cg DEBUG: begin cg -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::compute_norm2_dispatch DEBUG: end dense::compute_norm2_dispatch DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() -DEBUG: begin free -DEBUG: end free DEBUG: begin generate() DEBUG: begin generate() DEBUG: end generate() DEBUG: end generate() DEBUG: begin apply() DEBUG: begin iteration -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill DEBUG: end dense::fill -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin cg::initialize DEBUG: end cg::initialize DEBUG: begin advanced_apply() DEBUG: begin csr::advanced_spmv DEBUG: end csr::advanced_spmv DEBUG: end advanced_apply() -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::compute_norm2_dispatch DEBUG: end dense::compute_norm2_dispatch -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin apply() DEBUG: begin copy() DEBUG: begin dense::copy @@ -352,25 +280,9 @@ DEBUG: begin residual_norm::residual_norm DEBUG: end residual_norm::residual_norm DEBUG: end check() DEBUG: end check() -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end iteration DEBUG: end apply() -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() @@ -378,53 +290,9 @@ DEBUG: begin advanced_apply() DEBUG: begin csr::advanced_spmv DEBUG: end csr::advanced_spmv DEBUG: end advanced_apply() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::compute_norm2_dispatch DEBUG: end dense::compute_norm2_dispatch DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end cg DEBUG: end stencil(100,7pt) -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr index c47ce2a515b..60cf41ccbae 100644 --- a/benchmark/test/reference/sparse_blas.profile.stderr +++ b/benchmark/test/reference/sparse_blas.profile.stderr @@ -6,54 +6,18 @@ The random seed for right hand sides is 42 The operations are transpose Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free DEBUG: begin stencil(100,7pt) Running sparse_blas: transpose DEBUG: begin transpose -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin csr::transpose DEBUG: end csr::transpose -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end transpose DEBUG: end stencil(100,7pt) -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr index 4ff0125782f..2299614c6c4 100644 --- a/benchmark/test/reference/spmv.profile.stderr +++ b/benchmark/test/reference/spmv.profile.stderr @@ -6,61 +6,25 @@ The random seed for right hand sides is 42 The formats are coo The number of right hand sides is 1 Running test case stencil(100,7pt) -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free Matrix is of size (125, 125), 725 DEBUG: begin stencil(100,7pt) Running spmv: coo DEBUG: begin coo -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() @@ -68,17 +32,5 @@ DEBUG: begin apply() DEBUG: begin coo::spmv DEBUG: end coo::spmv DEBUG: end apply() -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end coo DEBUG: end stencil(100,7pt) -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr index 95a07c8275c..b44cef7f3f6 100644 --- a/benchmark/test/reference/spmv_distributed.profile.stderr +++ b/benchmark/test/reference/spmv_distributed.profile.stderr @@ -6,18 +6,8 @@ The random seed for right hand sides is 42 The formats are [csr]x[csr] The number of right hand sides is 1 Running test case stencil(100,7pt,stencil) -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -32,50 +22,16 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -90,54 +46,20 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free Matrix is of size (81, 81), 144 DEBUG: begin stencil(100,7pt,stencil) Running spmv: csr-csr DEBUG: begin csr-csr -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::fill_array @@ -152,18 +74,10 @@ DEBUG: begin partition::build_starting_indices DEBUG: end partition::build_starting_indices DEBUG: begin copy DEBUG: end copy -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin copy() @@ -176,8 +90,6 @@ DEBUG: end copy DEBUG: begin copy DEBUG: end copy DEBUG: end copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin copy() @@ -190,219 +102,27 @@ DEBUG: end copy DEBUG: begin copy DEBUG: end copy DEBUG: end copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::fill DEBUG: end dense::fill -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin distributed_matrix::build_local_nonlocal -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end distributed_matrix::build_local_nonlocal DEBUG: begin copy DEBUG: end copy DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin free -DEBUG: end free DEBUG: begin copy DEBUG: end copy -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: begin copy() -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() DEBUG: begin apply() -DEBUG: begin allocate -DEBUG: end allocate -DEBUG: begin allocate -DEBUG: end allocate DEBUG: begin dense::row_gather DEBUG: end dense::row_gather DEBUG: begin apply() @@ -414,33 +134,5 @@ DEBUG: begin csr::advanced_spmv DEBUG: end csr::advanced_spmv DEBUG: end advanced_apply() DEBUG: end apply() -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free DEBUG: end csr-csr DEBUG: end stencil(100,7pt,stencil) -DEBUG: begin free -DEBUG: end free -DEBUG: begin free -DEBUG: end free diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index 3deb282297a..014d3cb41a5 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -146,8 +146,9 @@ def compare_output_impl( " ".join(["'{}'".format(arg) for arg in launcher_flags + args]) ) ) - version_patterns = [ - " the .* module is", + ignore_patterns = [ + " the .* module is", # version numbers + "DEBUG: (begin|end ) (allocate|free)", # allocations ] typename_patterns = [ ("(apply|generate|check|copy|move)\([^())]*\)", "\\1()"), @@ -161,7 +162,7 @@ def compare_output_impl( "\n".join( determinize_text( result.stderr.decode(), - ignore_patterns=version_patterns, + ignore_patterns=ignore_patterns, replace_patterns=typename_patterns, ) ) @@ -171,13 +172,13 @@ def compare_output_impl( result_stdout_processed = determinize_json_text(result.stdout.decode()) result_stderr_processed = determinize_text( result.stderr.decode(), - ignore_patterns=version_patterns, + ignore_patterns=ignore_patterns, replace_patterns=typename_patterns, ) expected_stdout_processed = determinize_json_text(open(expected_stdout).read()) expected_stderr_processed = determinize_text( open(expected_stderr).read(), - ignore_patterns=version_patterns, + ignore_patterns=ignore_patterns, replace_patterns=typename_patterns, ) failed = False From 9cd278ce9aee94e4a70db750390b68aa6b3d93a2 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 17 Aug 2023 21:32:01 +0200 Subject: [PATCH 06/13] update matrix outputs --- benchmark/test/reference/conversion.matrix.stderr | 6 +----- benchmark/test/reference/conversion.matrix.stdout | 3 +-- .../test/reference/distributed_solver.matrix.stderr | 9 +-------- .../test/reference/distributed_solver.matrix.stdout | 3 +-- benchmark/test/reference/matrix_statistics.matrix.stderr | 6 +----- benchmark/test/reference/matrix_statistics.matrix.stdout | 3 +-- benchmark/test/reference/preconditioner.matrix.stderr | 6 +----- benchmark/test/reference/preconditioner.matrix.stdout | 3 +-- benchmark/test/reference/solver.matrix.stderr | 9 +-------- benchmark/test/reference/solver.matrix.stdout | 3 +-- benchmark/test/reference/sparse_blas.matrix.stderr | 6 +----- benchmark/test/reference/sparse_blas.matrix.stdout | 3 +-- benchmark/test/reference/spmv.matrix.stderr | 6 +----- benchmark/test/reference/spmv.matrix.stdout | 3 +-- benchmark/test/test_framework.py.in | 1 + 15 files changed, 15 insertions(+), 55 deletions(-) diff --git a/benchmark/test/reference/conversion.matrix.stderr b/benchmark/test/reference/conversion.matrix.stderr index 369a363a53e..5e7bd1cce24 100644 --- a/benchmark/test/reference/conversion.matrix.stderr +++ b/benchmark/test/reference/conversion.matrix.stderr @@ -4,11 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Running test case -{ - "filename": "", - "conversion": {} -} +Running test case Matrix is of size (36, 36), 208 Running conversion: coo-read Running conversion: coo-csr diff --git a/benchmark/test/reference/conversion.matrix.stdout b/benchmark/test/reference/conversion.matrix.stdout index 7e537fa4919..7f27b0c25b3 100644 --- a/benchmark/test/reference/conversion.matrix.stdout +++ b/benchmark/test/reference/conversion.matrix.stdout @@ -1,4 +1,3 @@ - [ { "filename": "", @@ -28,4 +27,4 @@ "cols": 36, "nonzeros": 208 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/distributed_solver.matrix.stderr b/benchmark/test/reference/distributed_solver.matrix.stderr index 4f0c6b22edd..cd2bb49261c 100644 --- a/benchmark/test/reference/distributed_solver.matrix.stderr +++ b/benchmark/test/reference/distributed_solver.matrix.stderr @@ -5,13 +5,6 @@ Running with 2 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "filename": "", - "optimal": { - "spmv": "csr-csr" - }, - "solver": {} -} +Running test case Matrix is of size (36, 36) Running solver: cg diff --git a/benchmark/test/reference/distributed_solver.matrix.stdout b/benchmark/test/reference/distributed_solver.matrix.stdout index cd3c7b8bd43..ec1d258e2f4 100644 --- a/benchmark/test/reference/distributed_solver.matrix.stdout +++ b/benchmark/test/reference/distributed_solver.matrix.stdout @@ -1,4 +1,3 @@ - [ { "filename": "", @@ -55,4 +54,4 @@ "rows": 36, "cols": 36 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/matrix_statistics.matrix.stderr b/benchmark/test/reference/matrix_statistics.matrix.stderr index 7bb33842f25..0b31ef3a888 100644 --- a/benchmark/test/reference/matrix_statistics.matrix.stderr +++ b/benchmark/test/reference/matrix_statistics.matrix.stderr @@ -1,8 +1,4 @@ This is Ginkgo 1.7.0 (develop) running with core module 1.7.0 (develop) -Running test case -{ - "filename": "", - "problem": {} -} +Running test case Matrix is of size (36, 36), 208 diff --git a/benchmark/test/reference/matrix_statistics.matrix.stdout b/benchmark/test/reference/matrix_statistics.matrix.stdout index ea73587fde4..a6297e89b66 100644 --- a/benchmark/test/reference/matrix_statistics.matrix.stdout +++ b/benchmark/test/reference/matrix_statistics.matrix.stdout @@ -1,4 +1,3 @@ - [ { "filename": "", @@ -37,4 +36,4 @@ "cols": 36, "nonzeros": 208 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/preconditioner.matrix.stderr b/benchmark/test/reference/preconditioner.matrix.stderr index 4088a20c925..7452ab91b3a 100644 --- a/benchmark/test/reference/preconditioner.matrix.stderr +++ b/benchmark/test/reference/preconditioner.matrix.stderr @@ -4,10 +4,6 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 Running with preconditioners: none -Running test case -{ - "filename": "", - "preconditioner": {} -} +Running test case Matrix is of size (36, 36), 208 Running preconditioner: none diff --git a/benchmark/test/reference/preconditioner.matrix.stdout b/benchmark/test/reference/preconditioner.matrix.stdout index 0415a87ea8d..51adb7383c3 100644 --- a/benchmark/test/reference/preconditioner.matrix.stdout +++ b/benchmark/test/reference/preconditioner.matrix.stdout @@ -1,4 +1,3 @@ - [ { "filename": "", @@ -29,4 +28,4 @@ "cols": 36, "nonzeros": 208 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/solver.matrix.stderr b/benchmark/test/reference/solver.matrix.stderr index 8a1ea117314..cd2bb49261c 100644 --- a/benchmark/test/reference/solver.matrix.stderr +++ b/benchmark/test/reference/solver.matrix.stderr @@ -5,13 +5,6 @@ Running with 2 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "filename": "", - "optimal": { - "spmv": "csr" - }, - "solver": {} -} +Running test case Matrix is of size (36, 36) Running solver: cg diff --git a/benchmark/test/reference/solver.matrix.stdout b/benchmark/test/reference/solver.matrix.stdout index 56577288c2d..a87e78f7f66 100644 --- a/benchmark/test/reference/solver.matrix.stdout +++ b/benchmark/test/reference/solver.matrix.stdout @@ -1,4 +1,3 @@ - [ { "filename": "", @@ -53,4 +52,4 @@ "rows": 36, "cols": 36 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/sparse_blas.matrix.stderr b/benchmark/test/reference/sparse_blas.matrix.stderr index ff52b6a3269..483429fd71d 100644 --- a/benchmark/test/reference/sparse_blas.matrix.stderr +++ b/benchmark/test/reference/sparse_blas.matrix.stderr @@ -4,10 +4,6 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The operations are transpose -Running test case -{ - "filename": "", - "sparse_blas": {} -} +Running test case Matrix is of size (36, 36), 208 Running sparse_blas: transpose diff --git a/benchmark/test/reference/sparse_blas.matrix.stdout b/benchmark/test/reference/sparse_blas.matrix.stdout index 4a64c8ea1ce..74fdbf98e7a 100644 --- a/benchmark/test/reference/sparse_blas.matrix.stdout +++ b/benchmark/test/reference/sparse_blas.matrix.stdout @@ -1,4 +1,3 @@ - [ { "filename": "", @@ -22,4 +21,4 @@ "cols": 36, "nonzeros": 208 } -] +] \ No newline at end of file diff --git a/benchmark/test/reference/spmv.matrix.stderr b/benchmark/test/reference/spmv.matrix.stderr index a618da5b321..45beba6cafb 100644 --- a/benchmark/test/reference/spmv.matrix.stderr +++ b/benchmark/test/reference/spmv.matrix.stderr @@ -5,10 +5,6 @@ Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo The number of right hand sides is 1 -Running test case -{ - "filename": "", - "spmv": {} -} +Running test case Matrix is of size (36, 36), 208 Running spmv: coo diff --git a/benchmark/test/reference/spmv.matrix.stdout b/benchmark/test/reference/spmv.matrix.stdout index dc30ab6b284..4d03ce3cd07 100644 --- a/benchmark/test/reference/spmv.matrix.stdout +++ b/benchmark/test/reference/spmv.matrix.stdout @@ -1,4 +1,3 @@ - [ { "filename": "", @@ -18,4 +17,4 @@ "spmv": "coo" } } -] +] \ No newline at end of file diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index 014d3cb41a5..6e3092bde6c 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -153,6 +153,7 @@ def compare_output_impl( typename_patterns = [ ("(apply|generate|check|copy|move)\([^())]*\)", "\\1()"), ("what\\(\\): .*", "what(): "), + (re.escape(str(matrixpath)), ""), ] if generate: open(expected_stdout, "w").write( From 8adf765865c03f261c155ed1e1db50550b7eef2c Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 19 Aug 2023 11:27:22 +0200 Subject: [PATCH 07/13] review updates - rename 'determinize' -> 'sanitize' - use empty struct for empty benchmark state - use version tag instead of commit ID - use std::endl where appropriate Co-authored-by: Marcel Koch --- .../matrix_statistics/matrix_statistics.cpp | 5 +++- benchmark/test/test_framework.py.in | 30 +++++++++++-------- benchmark/utils/general.hpp | 2 +- third_party/nlohmann_json/CMakeLists.txt | 2 +- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp index 40c505c7627..4bb63032550 100644 --- a/benchmark/matrix_statistics/matrix_statistics.cpp +++ b/benchmark/matrix_statistics/matrix_statistics.cpp @@ -149,7 +149,10 @@ void extract_matrix_statistics(gko::matrix_data& data, using Generator = DefaultSystemGenerator; -struct MatrixStatistics : Benchmark { +struct empty_state {}; + + +struct MatrixStatistics : Benchmark { std::string name; std::vector empty; diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index 6e3092bde6c..1a07818df1f 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -22,7 +22,8 @@ denumberify_paths = [ "rhs_norm", "max_relative_norm2", ] -detypenameify_key_starts = ["generate(", "apply(", "advanced_apply(", "copy(", "check("] +detypenameify_key_starts = [ + "generate(", "apply(", "advanced_apply(", "copy(", "check("] empty_string_paths = ["filename"] empty_array_paths = [ "recurrent_residuals", @@ -44,7 +45,7 @@ def sanitize_json_key(key: str): return key -def sanitize_json_key_value(key: str, value, sanitize_all: bool): +def sanitize_json_value(key: str, value, sanitize_all: bool): """Applies sanitation to a single key-value pair. Strings with a key in empty_string_paths will be emptied @@ -72,7 +73,7 @@ def sanitize_json(parsed_input, sanitize_all: bool = False): if isinstance(parsed_input, dict): return { - sanitize_json_key(key): sanitize_json_key_value(key, value, sanitize_all) + sanitize_json_key(key): sanitize_json_value(key, value, sanitize_all) for key, value in parsed_input.items() } elif isinstance(parsed_input, list): @@ -83,7 +84,7 @@ def sanitize_json(parsed_input, sanitize_all: bool = False): return parsed_input -def determinize_json_text(input: str) -> List[str]: +def sanitize_json_text(input: str) -> List[str]: """Sanitizes the given input JSON string. The JSON values will be parsed and sanitized through sanitize_json(...) @@ -94,7 +95,7 @@ def determinize_json_text(input: str) -> List[str]: return result.splitlines() -def determinize_text( +def sanitize_text( input: str, ignore_patterns: List[str], replace_patterns: List[Tuple[str, str]], @@ -157,11 +158,11 @@ def compare_output_impl( ] if generate: open(expected_stdout, "w").write( - "\n".join(determinize_json_text(result.stdout.decode())) + "\n".join(sanitize_json_text(result.stdout.decode())) ) open(expected_stderr, "w").write( "\n".join( - determinize_text( + sanitize_text( result.stderr.decode(), ignore_patterns=ignore_patterns, replace_patterns=typename_patterns, @@ -170,14 +171,15 @@ def compare_output_impl( ) print("GENERATED") return - result_stdout_processed = determinize_json_text(result.stdout.decode()) - result_stderr_processed = determinize_text( + result_stdout_processed = sanitize_json_text(result.stdout.decode()) + result_stderr_processed = sanitize_text( result.stderr.decode(), ignore_patterns=ignore_patterns, replace_patterns=typename_patterns, ) - expected_stdout_processed = determinize_json_text(open(expected_stdout).read()) - expected_stderr_processed = determinize_text( + expected_stdout_processed = sanitize_json_text( + open(expected_stdout).read()) + expected_stderr_processed = sanitize_text( open(expected_stderr).read(), ignore_patterns=ignore_patterns, replace_patterns=typename_patterns, @@ -187,7 +189,8 @@ def compare_output_impl( print("FAIL: stdout differs") print( "\n".join( - difflib.unified_diff(expected_stdout_processed, result_stdout_processed) + difflib.unified_diff( + expected_stdout_processed, result_stdout_processed) ) ) failed = True @@ -195,7 +198,8 @@ def compare_output_impl( print("FAIL: stderr differs") print( "\n".join( - difflib.unified_diff(expected_stderr_processed, result_stderr_processed) + difflib.unified_diff( + expected_stderr_processed, result_stderr_processed) ) ) failed = True diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index 1c48680f883..550f6fe2720 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -216,7 +216,7 @@ void print_general_information(const std::string& extra) } std::clog << "The random seed for right hand sides is " << FLAGS_seed << '\n' - << extra << '\n'; + << extra << std::endl; } diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt index 77064c66c40..b95cfa5606a 100644 --- a/third_party/nlohmann_json/CMakeLists.txt +++ b/third_party/nlohmann_json/CMakeLists.txt @@ -3,7 +3,7 @@ include(FetchContent) FetchContent_Declare( nlohmann_json GIT_REPOSITORY https://github.com/nlohmann/json.git - GIT_TAG bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d + GIT_TAG v3.9.1 ) set(JSON_BuildTests OFF CACHE INTERNAL "") FetchContent_MakeAvailable(nlohmann_json) From 8d52ec8a29c4c03af4d4cdbdf7d9dae06ccd64d7 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 19 Aug 2023 12:19:00 +0200 Subject: [PATCH 08/13] annotate repetitions --- benchmark/blas/blas_common.hpp | 17 +++++++----- benchmark/conversion/conversion.cpp | 26 +++++++++++++------ .../matrix_statistics/matrix_statistics.cpp | 9 ++++--- benchmark/preconditioner/preconditioner.cpp | 13 +++++++--- benchmark/solver/solver_common.hpp | 21 +++++++++------ benchmark/sparse_blas/sparse_blas.cpp | 17 +++++++----- benchmark/spmv/spmv_common.hpp | 16 +++++++----- benchmark/utils/general.hpp | 26 +++++++++++++++++++ benchmark/utils/runner.hpp | 16 ++++-------- 9 files changed, 109 insertions(+), 52 deletions(-) diff --git a/benchmark/blas/blas_common.hpp b/benchmark/blas/blas_common.hpp index 88819a043b0..1267dc57c15 100644 --- a/benchmark/blas/blas_common.hpp +++ b/benchmark/blas/blas_common.hpp @@ -489,7 +489,8 @@ struct BlasBenchmark : Benchmark { void run(std::shared_ptr exec, std::shared_ptr timer, - dimensions& dims, const std::string& operation_name, + annotate_functor annotate, dimensions& dims, + const std::string& operation_name, json& operation_case) const override { auto op = operation_map.at(operation_name)(exec, dims); @@ -497,16 +498,20 @@ struct BlasBenchmark : Benchmark { IterationControl ic(timer); // warm run - for (auto _ : ic.warmup_run()) { - op->prepare(); - exec->synchronize(); - op->run(); - exec->synchronize(); + { + auto range = annotate("warmup", FLAGS_warmup > 0); + for (auto _ : ic.warmup_run()) { + op->prepare(); + exec->synchronize(); + op->run(); + exec->synchronize(); + } } // timed run op->prepare(); for (auto _ : ic.run()) { + auto range = annotate("repetition"); op->run(); } const auto runtime = ic.compute_time(FLAGS_timer_method); diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp index 5f03cb2b933..c777db1a35a 100644 --- a/benchmark/conversion/conversion.cpp +++ b/benchmark/conversion/conversion.cpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" +#include "benchmark/utils/general.hpp" #include "benchmark/utils/general_matrix.hpp" #include "benchmark/utils/generator.hpp" #include "benchmark/utils/iteration_control.hpp" @@ -128,6 +129,7 @@ struct ConversionBenchmark : Benchmark> { void run(std::shared_ptr exec, std::shared_ptr timer, + annotate_functor annotate, gko::device_matrix_data& data, const std::string& operation_name, json& operation_case) const override @@ -142,13 +144,17 @@ struct ConversionBenchmark : Benchmark> { IterationControl ic{timer}; if (to_name == "read") { // warm run - for (auto _ : ic.warmup_run()) { - exec->synchronize(); - readable->read(data); - exec->synchronize(); + { + auto range = annotate("warmup", FLAGS_warmup > 0); + for (auto _ : ic.warmup_run()) { + exec->synchronize(); + readable->read(data); + exec->synchronize(); + } } // timed run for (auto _ : ic.run()) { + auto range = annotate("repetition"); readable->read(data); } } else { @@ -156,13 +162,17 @@ struct ConversionBenchmark : Benchmark> { auto mtx_to = formats::matrix_type_factory.at(to_name)(exec); // warm run - for (auto _ : ic.warmup_run()) { - exec->synchronize(); - mtx_to->copy_from(mtx_from); - exec->synchronize(); + { + auto range = annotate("warmup", FLAGS_warmup > 0); + for (auto _ : ic.warmup_run()) { + exec->synchronize(); + mtx_to->copy_from(mtx_from); + exec->synchronize(); + } } // timed run for (auto _ : ic.run()) { + auto range = annotate("repetition"); mtx_to->copy_from(mtx_from); } } diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp index 4bb63032550..20feecf5ccf 100644 --- a/benchmark/matrix_statistics/matrix_statistics.cpp +++ b/benchmark/matrix_statistics/matrix_statistics.cpp @@ -182,8 +182,8 @@ struct MatrixStatistics : Benchmark { return Generator::describe_config(test_case); } - int setup(std::shared_ptr exec, - json& test_case) const override + empty_state setup(std::shared_ptr exec, + json& test_case) const override { auto data = Generator::generate_matrix_data(test_case); std::clog << "Matrix is of size (" << data.size[0] << ", " @@ -193,12 +193,13 @@ struct MatrixStatistics : Benchmark { test_case["nonzeros"] = data.nonzeros.size(); extract_matrix_statistics(data, test_case["problem"]); - return 0; + return {}; } void run(std::shared_ptr exec, std::shared_ptr timer, - int& data, const std::string& operation_name, + annotate_functor annotate, empty_state& data, + const std::string& operation_name, json& operation_case) const override {} }; diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 7c130328d34..98f116f9b12 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" +#include "benchmark/utils/general.hpp" #include "benchmark/utils/general_matrix.hpp" #include "benchmark/utils/generator.hpp" #include "benchmark/utils/iteration_control.hpp" @@ -199,7 +200,7 @@ struct PreconditionerBenchmark : Benchmark { void run(std::shared_ptr exec, std::shared_ptr timer, - preconditioner_benchmark_state& state, + annotate_functor annotate, preconditioner_benchmark_state& state, const std::string& encoded_precond_name, json& precond_case) const override { @@ -219,12 +220,17 @@ struct PreconditionerBenchmark : Benchmark { auto precond = precond_factory.at(decoded_precond_name)(exec); - for (auto _ : ic_apply.warmup_run()) { - precond->generate(state.system_matrix)->apply(state.b, x_clone); + { + auto range = annotate("warmup", FLAGS_warmup > 0); + for (auto _ : ic_apply.warmup_run()) { + precond->generate(state.system_matrix) + ->apply(state.b, x_clone); + } } std::unique_ptr precond_op; for (auto _ : ic_gen.run()) { + auto range = annotate("repetition generate"); precond_op = precond->generate(state.system_matrix); } @@ -234,6 +240,7 @@ struct PreconditionerBenchmark : Benchmark { ic_gen.get_num_repetitions(); for (auto _ : ic_apply.run()) { + auto range = annotate("repetition apply"); precond_op->apply(state.b, x_clone); } diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp index 4976e5759d4..597ab76729a 100644 --- a/benchmark/solver/solver_common.hpp +++ b/benchmark/solver/solver_common.hpp @@ -458,6 +458,7 @@ struct SolverBenchmark : Benchmark> { void run(std::shared_ptr exec, std::shared_ptr timer, + annotate_functor annotate, solver_benchmark_state& state, const std::string& encoded_solver_name, json& solver_case) const override @@ -482,14 +483,17 @@ struct SolverBenchmark : Benchmark> { // warm run std::shared_ptr solver; - for (auto _ : ic.warmup_run()) { - auto x_clone = clone(state.x); - auto precond = precond_factory.at(precond_name)(exec); - solver = generate_solver(exec, give(precond), solver_name, - FLAGS_warmup_max_iters) - ->generate(state.system_matrix); - solver->apply(state.b, x_clone); - exec->synchronize(); + { + auto range = annotate("warmup", FLAGS_warmup > 0); + for (auto _ : ic.warmup_run()) { + auto x_clone = clone(state.x); + auto precond = precond_factory.at(precond_name)(exec); + solver = generate_solver(exec, give(precond), solver_name, + FLAGS_warmup_max_iters) + ->generate(state.system_matrix); + solver->apply(state.b, x_clone); + exec->synchronize(); + } } // detail run @@ -566,6 +570,7 @@ struct SolverBenchmark : Benchmark> { auto apply_timer = ic.get_timer(); auto x_clone = clone(state.x); for (auto status : ic.run(false)) { + auto range = annotate("repetition"); x_clone = clone(state.x); exec->synchronize(); diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp index 21df4d9c448..5d479eb7fc0 100644 --- a/benchmark/sparse_blas/sparse_blas.cpp +++ b/benchmark/sparse_blas/sparse_blas.cpp @@ -128,7 +128,8 @@ struct SparseBlasBenchmark : Benchmark> { void run(std::shared_ptr exec, std::shared_ptr timer, - std::unique_ptr& mtx, const std::string& operation_name, + annotate_functor annotate, std::unique_ptr& mtx, + const std::string& operation_name, json& operation_case) const override { auto op = get_operation(operation_name, mtx.get()); @@ -136,16 +137,20 @@ struct SparseBlasBenchmark : Benchmark> { IterationControl ic(timer); // warm run - for (auto _ : ic.warmup_run()) { - op->prepare(); - exec->synchronize(); - op->run(); - exec->synchronize(); + { + auto range = annotate("warmup", FLAGS_warmup > 0); + for (auto _ : ic.warmup_run()) { + op->prepare(); + exec->synchronize(); + op->run(); + exec->synchronize(); + } } // timed run op->prepare(); for (auto _ : ic.run()) { + auto range = annotate("repetition"); op->run(); } const auto runtime = ic.compute_time(FLAGS_timer_method); diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp index 4a7d014de8b..f589077834e 100644 --- a/benchmark/spmv/spmv_common.hpp +++ b/benchmark/spmv/spmv_common.hpp @@ -130,7 +130,7 @@ struct SpmvBenchmark : Benchmark> { } void run(std::shared_ptr exec, std::shared_ptr timer, - spmv_benchmark_state& state, + annotate_functor annotate, spmv_benchmark_state& state, const std::string& format_name, json& format_case) const override { auto system_matrix = generator.generate_matrix_with_format( @@ -149,11 +149,14 @@ struct SpmvBenchmark : Benchmark> { IterationControl ic{timer}; // warm run - for (auto _ : ic.warmup_run()) { - auto x_clone = clone(state.x); - exec->synchronize(); - system_matrix->apply(state.b, x_clone); - exec->synchronize(); + { + auto range = annotate("warmup", FLAGS_warmup > 0); + for (auto _ : ic.warmup_run()) { + auto x_clone = clone(state.x); + exec->synchronize(); + system_matrix->apply(state.b, x_clone); + exec->synchronize(); + } } // tuning run @@ -192,6 +195,7 @@ struct SpmvBenchmark : Benchmark> { // timed run auto x_clone = clone(state.x); for (auto _ : ic.run()) { + auto range = annotate("repetition"); system_matrix->apply(state.b, x_clone); } format_case["time"] = ic.compute_time(FLAGS_timer_method); diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index 550f6fe2720..6012cb6c77b 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -245,6 +245,32 @@ std::shared_ptr create_profiler_hook( } +struct annotate_functor { + gko::log::profiling_scope_guard operator()(const char* name) const + { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + } + + gko::log::profiling_scope_guard operator()(const char* name, + bool should_annotate) const + { + if (profiler_hook && should_annotate) { + return profiler_hook->user_range(name); + } + return {}; + } + + annotate_functor(std::shared_ptr profiler_hook) + : profiler_hook{std::move(profiler_hook)} + {} + + std::shared_ptr profiler_hook; +}; + + // Returns a random number engine std::default_random_engine& get_engine() { diff --git a/benchmark/utils/runner.hpp b/benchmark/utils/runner.hpp index 661c403706f..264dc3965db 100644 --- a/benchmark/utils/runner.hpp +++ b/benchmark/utils/runner.hpp @@ -102,8 +102,8 @@ struct Benchmark { /** Runs a single operation of the benchmark */ virtual void run(std::shared_ptr exec, - std::shared_ptr timer, State& state, - const std::string& operation, + std::shared_ptr timer, annotate_functor annotate, + State& state, const std::string& operation, json& operation_case) const = 0; /** Post-process test case info. */ @@ -139,13 +139,7 @@ void run_test_cases(const Benchmark& benchmark, if (profiler_hook) { exec->add_logger(profiler_hook); } - auto annotate = - [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { - if (profiler_hook) { - return profiler_hook->user_range(name); - } - return {}; - }; + auto annotate = annotate_functor(profiler_hook); for (auto& test_case : test_cases) { try { @@ -174,8 +168,8 @@ void run_test_cases(const Benchmark& benchmark, auto& operation_case = benchmark_case[operation_name]; try { auto operation_range = annotate(operation_name.c_str()); - benchmark.run(exec, timer, test_case_state, operation_name, - operation_case); + benchmark.run(exec, timer, annotate, test_case_state, + operation_name, operation_case); operation_case["completed"] = true; } catch (const std::exception& e) { operation_case["completed"] = false; From e2f29961e909163af01ba63e5b5cf1c41e64cc5c Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sat, 19 Aug 2023 12:19:10 +0200 Subject: [PATCH 09/13] update test output --- benchmark/test/reference/blas.profile.stderr | 6 ++++++ benchmark/test/reference/conversion.profile.stderr | 8 ++++++++ .../test/reference/distributed_solver.profile.stderr | 2 ++ .../reference/multi_vector_distributed.profile.stderr | 6 ++++++ benchmark/test/reference/preconditioner.profile.stderr | 4 ++++ benchmark/test/reference/solver.profile.stderr | 2 ++ benchmark/test/reference/sparse_blas.profile.stderr | 2 ++ benchmark/test/reference/spmv.profile.stderr | 2 ++ benchmark/test/reference/spmv_distributed.profile.stderr | 2 ++ 9 files changed, 34 insertions(+) diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr index 529fc16009c..7307fb0ad7e 100644 --- a/benchmark/test/reference/blas.profile.stderr +++ b/benchmark/test/reference/blas.profile.stderr @@ -10,8 +10,10 @@ DEBUG: begin n = 100 DEBUG: begin copy DEBUG: begin dense::fill DEBUG: end dense::fill +DEBUG: begin repetition DEBUG: begin dense::copy DEBUG: end dense::copy +DEBUG: end repetition DEBUG: end copy Running blas: axpy DEBUG: begin axpy @@ -21,8 +23,10 @@ DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill DEBUG: end dense::fill +DEBUG: begin repetition DEBUG: begin dense::add_scaled DEBUG: end dense::add_scaled +DEBUG: end repetition DEBUG: end axpy Running blas: scal DEBUG: begin scal @@ -30,7 +34,9 @@ DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill DEBUG: end dense::fill +DEBUG: begin repetition DEBUG: begin dense::scale DEBUG: end dense::scale +DEBUG: end repetition DEBUG: end scal DEBUG: end n = 100 diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr index a233579c721..3a4301b13eb 100644 --- a/benchmark/test/reference/conversion.profile.stderr +++ b/benchmark/test/reference/conversion.profile.stderr @@ -11,12 +11,14 @@ DEBUG: end components::aos_to_soa DEBUG: begin stencil(100,7pt) Running conversion: coo-read DEBUG: begin coo-read +DEBUG: begin repetition DEBUG: begin copy DEBUG: end copy DEBUG: begin copy DEBUG: end copy DEBUG: begin copy DEBUG: end copy +DEBUG: end repetition DEBUG: end coo-read Running conversion: coo-csr DEBUG: begin coo-csr @@ -28,6 +30,7 @@ DEBUG: begin copy DEBUG: end copy DEBUG: begin components::fill_array DEBUG: end components::fill_array +DEBUG: begin repetition DEBUG: begin copy() DEBUG: begin copy DEBUG: end copy @@ -36,11 +39,13 @@ DEBUG: end copy DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs DEBUG: end copy() +DEBUG: end repetition DEBUG: end coo-csr Running conversion: csr-read DEBUG: begin csr-read DEBUG: begin components::fill_array DEBUG: end components::fill_array +DEBUG: begin repetition DEBUG: begin copy DEBUG: end copy DEBUG: begin copy @@ -49,6 +54,7 @@ DEBUG: begin copy DEBUG: end copy DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs +DEBUG: end repetition DEBUG: end csr-read Running conversion: csr-coo DEBUG: begin csr-coo @@ -62,6 +68,7 @@ DEBUG: begin copy DEBUG: end copy DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs +DEBUG: begin repetition DEBUG: begin copy() DEBUG: begin copy DEBUG: end copy @@ -70,5 +77,6 @@ DEBUG: end copy DEBUG: begin components::convert_ptrs_to_idxs DEBUG: end components::convert_ptrs_to_idxs DEBUG: end copy() +DEBUG: end repetition DEBUG: end csr-coo DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr index 4ea20730117..227737e56b3 100644 --- a/benchmark/test/reference/distributed_solver.profile.stderr +++ b/benchmark/test/reference/distributed_solver.profile.stderr @@ -90,6 +90,7 @@ DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() +DEBUG: begin repetition DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy @@ -420,6 +421,7 @@ DEBUG: end check() DEBUG: end check() DEBUG: end iteration DEBUG: end apply() +DEBUG: end repetition DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr index 102330e38f4..85bd138514b 100644 --- a/benchmark/test/reference/multi_vector_distributed.profile.stderr +++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr @@ -46,8 +46,10 @@ DEBUG: begin copy DEBUG: end copy DEBUG: begin dense::fill DEBUG: end dense::fill +DEBUG: begin repetition DEBUG: begin dense::copy DEBUG: end dense::copy +DEBUG: end repetition DEBUG: end copy Running blas: axpy DEBUG: begin axpy @@ -93,8 +95,10 @@ DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill DEBUG: end dense::fill +DEBUG: begin repetition DEBUG: begin dense::add_scaled DEBUG: end dense::add_scaled +DEBUG: end repetition DEBUG: end axpy Running blas: scal DEBUG: begin scal @@ -120,7 +124,9 @@ DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill DEBUG: end dense::fill +DEBUG: begin repetition DEBUG: begin dense::scale DEBUG: end dense::scale +DEBUG: end repetition DEBUG: end scal DEBUG: end n = 100 diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr index 610dfe464ec..e2069c318d2 100644 --- a/benchmark/test/reference/preconditioner.profile.stderr +++ b/benchmark/test/reference/preconditioner.profile.stderr @@ -31,13 +31,17 @@ DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() +DEBUG: begin repetition generate DEBUG: begin generate() DEBUG: end generate() +DEBUG: end repetition generate +DEBUG: begin repetition apply DEBUG: begin apply() DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() DEBUG: end apply() +DEBUG: end repetition apply DEBUG: end none DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr index 238591eb0c9..5e1e2cdb312 100644 --- a/benchmark/test/reference/solver.profile.stderr +++ b/benchmark/test/reference/solver.profile.stderr @@ -34,6 +34,7 @@ DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() +DEBUG: begin repetition DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy @@ -282,6 +283,7 @@ DEBUG: end check() DEBUG: end check() DEBUG: end iteration DEBUG: end apply() +DEBUG: end repetition DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr index 60cf41ccbae..fd991de7063 100644 --- a/benchmark/test/reference/sparse_blas.profile.stderr +++ b/benchmark/test/reference/sparse_blas.profile.stderr @@ -15,9 +15,11 @@ DEBUG: end components::convert_idxs_to_ptrs DEBUG: begin stencil(100,7pt) Running sparse_blas: transpose DEBUG: begin transpose +DEBUG: begin repetition DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin csr::transpose DEBUG: end csr::transpose +DEBUG: end repetition DEBUG: end transpose DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr index 2299614c6c4..1cc24a5f186 100644 --- a/benchmark/test/reference/spmv.profile.stderr +++ b/benchmark/test/reference/spmv.profile.stderr @@ -28,9 +28,11 @@ DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() +DEBUG: begin repetition DEBUG: begin apply() DEBUG: begin coo::spmv DEBUG: end coo::spmv DEBUG: end apply() +DEBUG: end repetition DEBUG: end coo DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr index b44cef7f3f6..f0d28332ef0 100644 --- a/benchmark/test/reference/spmv_distributed.profile.stderr +++ b/benchmark/test/reference/spmv_distributed.profile.stderr @@ -122,6 +122,7 @@ DEBUG: begin copy() DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() +DEBUG: begin repetition DEBUG: begin apply() DEBUG: begin dense::row_gather DEBUG: end dense::row_gather @@ -134,5 +135,6 @@ DEBUG: begin csr::advanced_spmv DEBUG: end csr::advanced_spmv DEBUG: end advanced_apply() DEBUG: end apply() +DEBUG: end repetition DEBUG: end csr-csr DEBUG: end stencil(100,7pt,stencil) From 49ffd96d68d39c4a80e97f72ff9c43923b856a3c Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 22 Aug 2023 17:52:30 +0200 Subject: [PATCH 10/13] update documentation --- ABOUT-LICENSING.md | 105 +++----------------------------------- INSTALL.md | 6 +-- benchmark/CMakeLists.txt | 4 +- dev_tools/scripts/regroup | 2 +- 4 files changed, 14 insertions(+), 103 deletions(-) diff --git a/ABOUT-LICENSING.md b/ABOUT-LICENSING.md index df081e2211b..d6e68911d1a 100644 --- a/ABOUT-LICENSING.md +++ b/ABOUT-LICENSING.md @@ -76,7 +76,7 @@ the following license: When compiling Ginkgo with `-DGINKGO_BUILD_BENCHMARKS=ON` the build system will download, build, and link [gflags](https://github.com/gflags/gflags) and -[RapidJSON](https://github.com/Tencent/rapidjson) with the +[nlohmann-json](https://github.com/nlohmann/json) with the benchmark suites. gtest is available under the following license: > Copyright (c) 2006, Google Inc. @@ -108,110 +108,22 @@ benchmark suites. gtest is available under the following license: > (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE > OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -RapidJSON is available under the following license (note that Ginkgo's build -system automatically removes the `bin/jsonchecker/` directory which is licensed -under the problematic JSON license): +nlohmann-json is available under the following license: -> Tencent is pleased to support the open source community by making RapidJSON -> available. -> -> Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All -> rights reserved. -> -> If you have downloaded a copy of the RapidJSON binary from Tencent, please -> note that the RapidJSON binary is licensed under the MIT License. If you have -> downloaded a copy of the RapidJSON source code from Tencent, please note that -> RapidJSON source code is licensed under the MIT License, except for the -> third-party components listed below which are subject to different license -> terms. Your integration of RapidJSON into your own projects may require -> compliance with the MIT License, as well as the other licenses applicable to -> the third-party components included within RapidJSON. To avoid the problematic -> JSON license in your own projects, it's sufficient to exclude the -> bin/jsonchecker/ directory, as it's the only code under the JSON license. A -> copy of the MIT License is included in this file. -> -> Other dependencies and licenses: -> -> Open Source Software Licensed Under the BSD License: -> -------------------------------------------------------------------- -> -> The msinttypes r29 -> -> Copyright (c) 2006-2013 Alexander Chemeris -> All rights reserved. -> -> Redistribution and use in source and binary forms, with or without -> modification, are permitted provided that the following conditions are met: -> -> * Redistributions of source code must retain the above copyright notice, this -> list of conditions and the following disclaimer. -> * Redistributions in binary form must reproduce the above copyright notice, -> this list of conditions and the following disclaimer in the documentation -> and/or other materials provided with the distribution. -> * Neither the name of copyright holder nor the names of its contributors may -> be used to endorse or promote products derived from this software without -> specific prior written permission. -> -> THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY -> EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -> WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -> DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY -> DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -> (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -> LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -> ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -> (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -> SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -> -> Open Source Software Licensed Under the JSON License: -> -------------------------------------------------------------------- -> -> json.org -> Copyright (c) 2002 -> JSON.org All Rights Reserved. -> -> JSON_checker -> Copyright (c) 2002 JSON.org -> All Rights Reserved. -> -> -> Terms of the JSON License: -> --------------------------------------------------- -> -> Permission is hereby granted, free of charge, to any person obtaining a copy -> of this software and associated documentation files (the "Software"), to deal -> in the Software without restriction, including without limitation the rights -> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -> copies of the Software, and to permit persons to whom the Software is -> furnished to do so, subject to the following conditions: -> -> The above copyright notice and this permission notice shall be included in all -> copies or substantial portions of the Software. -> -> The Software shall be used for Good, not Evil. -> -> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -> SOFTWARE. -> -> -> Terms of the MIT License: -> -------------------------------------------------------------------- -> +> MIT License +> +> Copyright (c) 2013-2022 Niels Lohmann +> > Permission is hereby granted, free of charge, to any person obtaining a copy > of this software and associated documentation files (the "Software"), to deal > in the Software without restriction, including without limitation the rights > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > copies of the Software, and to permit persons to whom the Software is > furnished to do so, subject to the following conditions: -> + > The above copyright notice and this permission notice shall be included in all > copies or substantial portions of the Software. -> +> > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -220,7 +132,6 @@ under the problematic JSON license): > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > SOFTWARE. - For generating the documentation of Ginkgo, some scripts from the deal.II library are used. You can refer to the `doc/` folder to see which files are a modified version of deal.II's documentation generation scripts. Additionally, diff --git a/INSTALL.md b/INSTALL.md index 5f788ed0e28..b29358d4eb6 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -31,7 +31,7 @@ Ginkgo adds the following additional switches to control what is being built: * `-DGINKGO_FAST_TESTS={ON, OFF}` reduces the input sizes for a few slow tests to speed them up, default is `OFF`. * `-DGINKGO_BUILD_BENCHMARKS={ON, OFF}` builds Ginkgo's benchmarks - (will download gflags and rapidjson), default is `ON`. + (will download gflags and nlohmann-json), default is `ON`. * `-DGINKGO_BUILD_EXAMPLES={ON, OFF}` builds Ginkgo's examples, default is `ON` * `-DGINKGO_BUILD_EXTLIB_EXAMPLE={ON, OFF}` builds the interfacing example with deal.II, default is `OFF`. @@ -205,7 +205,7 @@ packages can be turned off by disabling the relevant options. Test](https://github.com/google/googletest); + GINKGO_BUILD_BENCHMARKS=ON: For argument management we use [gflags](https://github.com/gflags/gflags) and for JSON parsing we use - [RapidJSON](https://github.com/Tencent/rapidjson); + [nlohmann-json](https://github.com/nlohmann/json); + GINKGO_DEVEL_TOOLS=ON: [git-cmake-format](https://github.com/gflegar/git-cmake-format) is our CMake helper for code formatting. @@ -224,7 +224,7 @@ packages can be turned off by disabling the relevant options. Ginkgo attempts to use pre-installed versions of these package if they match version requirements using `find_package`. Otherwise, the configuration step will download the files for each of the packages `GTest`, `gflags`, -`RapidJSON` and `hwloc` and build them internally. +`nlohmann-json` and `hwloc` and build them internally. Note that, if the external packages were not installed to the default location, the CMake option `-DCMAKE_PREFIX_PATH=` needs to be set to the diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index e993ee6cf0c..fd04620f595 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -46,7 +46,7 @@ endfunction() # Generates an executable for one precision. Each executable will be linked to -# `ginkgo`, `gflags` and `rapidjson`. +# `ginkgo`, `gflags` and `nlohmann-json`. # Note: This should only be used by `ginkgo_add_typed_benchmark_executables` # # \param name name for the executable to create (including type suffix) @@ -96,7 +96,7 @@ endfunction(ginkgo_add_single_benchmark_executable) # Generates an executable for each supported precision. Each executable will be -# linked to `ginkgo`, `gflags` and `rapidjson`. +# linked to `ginkgo`, `gflags` and `nlohmann-json`. # # \param name base-name for the executable to create # \param use_lib_linops Boolean indicating if linking against hipsparse/cusparse diff --git a/dev_tools/scripts/regroup b/dev_tools/scripts/regroup index 85eade99289..e35bd37efee 100644 --- a/dev_tools/scripts/regroup +++ b/dev_tools/scripts/regroup @@ -1,6 +1,6 @@ IncludeBlocks: Regroup IncludeCategories: - - Regex: '^<(rapidjson|gflags|gtest|papi).*' + - Regex: '^<(nlohmann|gflags|gtest|papi).*' Priority: 3 - Regex: '^<(omp|cu|hip|thrust|CL/|cooperative|oneapi|mpi|nvToolsExt).*' Priority: 2 From a725d3cd93a165037180b5696ae381b1bfa3229d Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 23 Aug 2023 13:18:37 +0200 Subject: [PATCH 11/13] review updates - remove unnecessary stdin in tests - simplify validate_config - consistently use pointer members instead of reference members Co-authored-by: Marcel Koch --- benchmark/solver/solver_common.hpp | 6 +--- benchmark/test/blas.py | 2 -- benchmark/test/multi_vector_distributed.py | 2 -- benchmark/utils/loggers.hpp | 34 +++++++++++----------- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp index 597ab76729a..0248ab8e757 100644 --- a/benchmark/solver/solver_common.hpp +++ b/benchmark/solver/solver_common.hpp @@ -414,11 +414,7 @@ struct SolverBenchmark : Benchmark> { bool validate_config(const json& value) const override { - return ((value.contains("size") && value.contains("stencil") && - value["size"].is_number_integer() && - value["stencil"].is_string()) || - (value.contains("filename") && - value["filename"].is_string())) && + return generator.validate_config(value) && (value.contains("optimal") && value["optimal"].contains("spmv") && value["optimal"]["spmv"].is_string()); diff --git a/benchmark/test/blas.py b/benchmark/test/blas.py index 160d5364e20..ff5bddc5d08 100755 --- a/benchmark/test/blas.py +++ b/benchmark/test/blas.py @@ -22,7 +22,6 @@ ["-input", str(test_framework.sourcepath / "input.blas.json")], expected_stdout="blas.simple.stdout", expected_stderr="blas.simple.stderr", - stdin='[{"n": 100}]', ) # profiler annotations @@ -30,5 +29,4 @@ ["-input", '[{"n": 100}]', "-profile", "-profiler_hook", "debug"], expected_stdout="blas.profile.stdout", expected_stderr="blas.profile.stderr", - stdin='[{"n": 100}]', ) diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py index 1e0c4c8adf5..c62cb8ebd17 100644 --- a/benchmark/test/multi_vector_distributed.py +++ b/benchmark/test/multi_vector_distributed.py @@ -24,7 +24,6 @@ ["-input", str(test_framework.sourcepath / "input.blas.json")], expected_stdout="multi_vector_distributed.simple.stdout", expected_stderr="multi_vector_distributed.simple.stderr", - stdin='[{"n": 100}]', num_procs=3, ) @@ -33,6 +32,5 @@ ["-input", '[{"n": 100}]', "-profile", "-profiler_hook", "debug"], expected_stdout="multi_vector_distributed.profile.stdout", expected_stderr="multi_vector_distributed.profile.stderr", - stdin='[{"n": 100}]', num_procs=3, ) diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp index 1e651811f0f..89ea6108eda 100644 --- a/benchmark/utils/loggers.hpp +++ b/benchmark/utils/loggers.hpp @@ -179,16 +179,16 @@ struct ResidualLogger : gko::log::Logger { const gko::array* status, bool all_stopped) const override { - timestamps.push_back(std::chrono::duration( - std::chrono::steady_clock::now() - start) - .count()); + timestamps->push_back(std::chrono::duration( + std::chrono::steady_clock::now() - start) + .count()); if (residual_norm) { - rec_res_norms.push_back( + rec_res_norms->push_back( get_norm(gko::as>(residual_norm))); } else { gko::detail::vector_dispatch( residual, [&](const auto v_residual) { - rec_res_norms.push_back(compute_norm2(v_residual)); + rec_res_norms->push_back(compute_norm2(v_residual)); }); } if (solution) { @@ -196,18 +196,18 @@ struct ResidualLogger : gko::log::Logger { rc_vtype>(solution, [&](auto v_solution) { using concrete_type = std::remove_pointer_t>; - true_res_norms.push_back(compute_residual_norm( + true_res_norms->push_back(compute_residual_norm( matrix, gko::as(b), v_solution)); }); } else { - true_res_norms.push_back(-1.0); + true_res_norms->push_back(-1.0); } if (implicit_sq_residual_norm) { - implicit_res_norms.push_back(std::sqrt( + implicit_res_norms->push_back(std::sqrt( get_norm(gko::as>(implicit_sq_residual_norm)))); has_implicit_res_norm = true; } else { - implicit_res_norms.push_back(-1.0); + implicit_res_norms->push_back(-1.0); } } @@ -219,11 +219,11 @@ struct ResidualLogger : gko::log::Logger { matrix{matrix.get()}, b{b.get()}, start{std::chrono::steady_clock::now()}, - rec_res_norms{rec_res_norms}, - true_res_norms{true_res_norms}, + rec_res_norms{&rec_res_norms}, + true_res_norms{&true_res_norms}, has_implicit_res_norm{}, - implicit_res_norms{implicit_res_norms}, - timestamps{timestamps} + implicit_res_norms{&implicit_res_norms}, + timestamps{×tamps} {} bool has_implicit_res_norms() const { return has_implicit_res_norm; } @@ -232,11 +232,11 @@ struct ResidualLogger : gko::log::Logger { const gko::LinOp* matrix; const gko::LinOp* b; std::chrono::steady_clock::time_point start; - json& rec_res_norms; - json& true_res_norms; + json* rec_res_norms; + json* true_res_norms; mutable bool has_implicit_res_norm; - json& implicit_res_norms; - json& timestamps; + json* implicit_res_norms; + json* timestamps; }; From 7b482dcf416e940b76c775ec67c0a39b286852aa Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 29 Aug 2023 19:24:34 +0200 Subject: [PATCH 12/13] review updates - don't install nlohmann-json - simplify code - improve config description formatting Co-authored-by: Yuhsiang M. Tsai --- benchmark/preconditioner/preconditioner.cpp | 3 +-- benchmark/spmv/spmv_common.hpp | 3 --- benchmark/test/CMakeLists.txt | 2 +- benchmark/test/reference/conversion.all.stderr | 2 +- benchmark/test/reference/conversion.profile.stderr | 6 +++--- benchmark/test/reference/conversion.simple.stderr | 2 +- benchmark/test/reference/distributed_solver.profile.stderr | 6 +++--- benchmark/test/reference/distributed_solver.simple.stderr | 2 +- benchmark/test/reference/matrix_statistics.simple.stderr | 2 +- benchmark/test/reference/preconditioner.profile.stderr | 6 +++--- benchmark/test/reference/preconditioner.simple.stderr | 2 +- benchmark/test/reference/solver.profile.stderr | 6 +++--- benchmark/test/reference/solver.simple.stderr | 2 +- benchmark/test/reference/sparse_blas.profile.stderr | 6 +++--- benchmark/test/reference/sparse_blas.simple.stderr | 2 +- benchmark/test/reference/spmv.profile.stderr | 6 +++--- benchmark/test/reference/spmv.simple.stderr | 2 +- benchmark/test/reference/spmv_distributed.profile.stderr | 6 +++--- benchmark/test/reference/spmv_distributed.simple.stderr | 2 +- benchmark/utils/generator.hpp | 6 +++--- third_party/nlohmann_json/CMakeLists.txt | 1 + 21 files changed, 36 insertions(+), 39 deletions(-) diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 98f116f9b12..074fe202e6c 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -205,9 +205,8 @@ struct PreconditionerBenchmark : Benchmark { json& precond_case) const override { auto decoded_precond_name = precond_decoder.at(encoded_precond_name); - precond_case["generate"] = json::object(); - precond_case["apply"] = json::object(); for (auto stage : {"generate", "apply"}) { + precond_case[stage] = json::object(); precond_case[stage]["components"] = json::object(); } diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp index f589077834e..c85642bb5f1 100644 --- a/benchmark/spmv/spmv_common.hpp +++ b/benchmark/spmv/spmv_common.hpp @@ -211,9 +211,6 @@ struct SpmvBenchmark : Benchmark> { std::string best_format; // find the fastest among all formats we tested for (const auto& format : formats) { - if (!test_case[name].contains(format)) { - continue; - } auto& format_case = test_case[name][format]; if (format_case.contains("completed") && format_case["completed"].template get()) { diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt index 1cd589927fa..2f43b6eaf71 100644 --- a/benchmark/test/CMakeLists.txt +++ b/benchmark/test/CMakeLists.txt @@ -25,4 +25,4 @@ if (GINKGO_BUILD_MPI) add_benchmark_test(multi_vector_distributed) add_benchmark_test(spmv_distributed) add_benchmark_test(solver_distributed) -endif() \ No newline at end of file +endif() diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr index 77ff50a1b89..f6f1002e443 100644 --- a/benchmark/test/reference/conversion.all.stderr +++ b/benchmark/test/reference/conversion.all.stderr @@ -4,7 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr,ell,sellp,hybrid -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125), 725 Running conversion: coo-read Running conversion: coo-csr diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr index 3a4301b13eb..b25fb4d42ee 100644 --- a/benchmark/test/reference/conversion.profile.stderr +++ b/benchmark/test/reference/conversion.profile.stderr @@ -4,11 +4,11 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125), 725 DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa -DEBUG: begin stencil(100,7pt) +DEBUG: begin stencil(100, 7pt) Running conversion: coo-read DEBUG: begin coo-read DEBUG: begin repetition @@ -79,4 +79,4 @@ DEBUG: end components::convert_ptrs_to_idxs DEBUG: end copy() DEBUG: end repetition DEBUG: end csr-coo -DEBUG: end stencil(100,7pt) +DEBUG: end stencil(100, 7pt) diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr index 9b51effac09..53777a4fc53 100644 --- a/benchmark/test/reference/conversion.simple.stderr +++ b/benchmark/test/reference/conversion.simple.stderr @@ -4,7 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125), 725 Running conversion: coo-read Running conversion: coo-csr diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr index 227737e56b3..e8ef115f8c2 100644 --- a/benchmark/test/reference/distributed_solver.profile.stderr +++ b/benchmark/test/reference/distributed_solver.profile.stderr @@ -5,7 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case stencil(100,7pt,stencil) +Running test case stencil(100, 7pt, stencil) DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size DEBUG: begin components::fill_array @@ -77,7 +77,7 @@ DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() Matrix is of size (125, 125) -DEBUG: begin stencil(100,7pt,stencil) +DEBUG: begin stencil(100, 7pt, stencil) Running solver: cg DEBUG: begin cg DEBUG: begin dense::compute_squared_norm2 @@ -445,4 +445,4 @@ DEBUG: end dense::compute_sqrt DEBUG: begin copy DEBUG: end copy DEBUG: end cg -DEBUG: end stencil(100,7pt,stencil) +DEBUG: end stencil(100, 7pt, stencil) diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr index 607081a3949..bdf57c2d0e1 100644 --- a/benchmark/test/reference/distributed_solver.simple.stderr +++ b/benchmark/test/reference/distributed_solver.simple.stderr @@ -5,6 +5,6 @@ Running with 2 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case stencil(100,7pt,stencil) +Running test case stencil(100, 7pt, stencil) Matrix is of size (125, 125) Running solver: cg diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr index d02edbc44da..bfaa411873e 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stderr +++ b/benchmark/test/reference/matrix_statistics.simple.stderr @@ -1,4 +1,4 @@ This is Ginkgo 1.7.0 (develop) running with core module 1.7.0 (develop) -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125), 725 diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr index e2069c318d2..328a738583c 100644 --- a/benchmark/test/reference/preconditioner.profile.stderr +++ b/benchmark/test/reference/preconditioner.profile.stderr @@ -4,7 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running with preconditioners: none -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::aos_to_soa @@ -24,7 +24,7 @@ DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data Matrix is of size (125, 125), 725 -DEBUG: begin stencil(100,7pt) +DEBUG: begin stencil(100, 7pt) Running preconditioner: none DEBUG: begin none DEBUG: begin copy() @@ -44,4 +44,4 @@ DEBUG: end copy() DEBUG: end apply() DEBUG: end repetition apply DEBUG: end none -DEBUG: end stencil(100,7pt) +DEBUG: end stencil(100, 7pt) diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr index 0090e180d2b..a428671486f 100644 --- a/benchmark/test/reference/preconditioner.simple.stderr +++ b/benchmark/test/reference/preconditioner.simple.stderr @@ -4,6 +4,6 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 Running with preconditioners: none -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125), 725 Running preconditioner: none diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr index 5e1e2cdb312..a9846dff61f 100644 --- a/benchmark/test/reference/solver.profile.stderr +++ b/benchmark/test/reference/solver.profile.stderr @@ -5,7 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) DEBUG: begin components::fill_array DEBUG: end components::fill_array DEBUG: begin components::aos_to_soa @@ -23,7 +23,7 @@ DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy() Matrix is of size (125, 125) -DEBUG: begin stencil(100,7pt) +DEBUG: begin stencil(100, 7pt) Running solver: cg DEBUG: begin cg DEBUG: begin dense::compute_norm2_dispatch @@ -297,4 +297,4 @@ DEBUG: end dense::compute_norm2_dispatch DEBUG: begin copy DEBUG: end copy DEBUG: end cg -DEBUG: end stencil(100,7pt) +DEBUG: end stencil(100, 7pt) diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr index 659dd026588..d9c04b69cf5 100644 --- a/benchmark/test/reference/solver.simple.stderr +++ b/benchmark/test/reference/solver.simple.stderr @@ -5,6 +5,6 @@ Running with 2 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125) Running solver: cg diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr index fd991de7063..70a9299ccae 100644 --- a/benchmark/test/reference/sparse_blas.profile.stderr +++ b/benchmark/test/reference/sparse_blas.profile.stderr @@ -4,7 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The operations are transpose -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125), 725 DEBUG: begin components::fill_array DEBUG: end components::fill_array @@ -12,7 +12,7 @@ DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs -DEBUG: begin stencil(100,7pt) +DEBUG: begin stencil(100, 7pt) Running sparse_blas: transpose DEBUG: begin transpose DEBUG: begin repetition @@ -22,4 +22,4 @@ DEBUG: begin csr::transpose DEBUG: end csr::transpose DEBUG: end repetition DEBUG: end transpose -DEBUG: end stencil(100,7pt) +DEBUG: end stencil(100, 7pt) diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr index 1f2bb34809f..fe6cf23d5b7 100644 --- a/benchmark/test/reference/sparse_blas.simple.stderr +++ b/benchmark/test/reference/sparse_blas.simple.stderr @@ -4,6 +4,6 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The operations are transpose -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125), 725 Running sparse_blas: transpose diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr index 1cc24a5f186..3c3ec3b7cfe 100644 --- a/benchmark/test/reference/spmv.profile.stderr +++ b/benchmark/test/reference/spmv.profile.stderr @@ -5,7 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The formats are coo The number of right hand sides is 1 -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) DEBUG: begin components::aos_to_soa DEBUG: end components::aos_to_soa DEBUG: begin dense::fill @@ -19,7 +19,7 @@ DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data Matrix is of size (125, 125), 725 -DEBUG: begin stencil(100,7pt) +DEBUG: begin stencil(100, 7pt) Running spmv: coo DEBUG: begin coo DEBUG: begin components::aos_to_soa @@ -35,4 +35,4 @@ DEBUG: end coo::spmv DEBUG: end apply() DEBUG: end repetition DEBUG: end coo -DEBUG: end stencil(100,7pt) +DEBUG: end stencil(100, 7pt) diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr index 9d5047febb6..97fe670aff7 100644 --- a/benchmark/test/reference/spmv.simple.stderr +++ b/benchmark/test/reference/spmv.simple.stderr @@ -5,6 +5,6 @@ Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo The number of right hand sides is 1 -Running test case stencil(100,7pt) +Running test case stencil(100, 7pt) Matrix is of size (125, 125), 725 Running spmv: coo diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr index f0d28332ef0..dc3cfd377c7 100644 --- a/benchmark/test/reference/spmv_distributed.profile.stderr +++ b/benchmark/test/reference/spmv_distributed.profile.stderr @@ -5,7 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The formats are [csr]x[csr] The number of right hand sides is 1 -Running test case stencil(100,7pt,stencil) +Running test case stencil(100, 7pt, stencil) DEBUG: begin partition::build_ranges_from_global_size DEBUG: end partition::build_ranges_from_global_size DEBUG: begin components::fill_array @@ -55,7 +55,7 @@ DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data Matrix is of size (81, 81), 144 -DEBUG: begin stencil(100,7pt,stencil) +DEBUG: begin stencil(100, 7pt, stencil) Running spmv: csr-csr DEBUG: begin csr-csr DEBUG: begin partition::build_ranges_from_global_size @@ -137,4 +137,4 @@ DEBUG: end advanced_apply() DEBUG: end apply() DEBUG: end repetition DEBUG: end csr-csr -DEBUG: end stencil(100,7pt,stencil) +DEBUG: end stencil(100, 7pt, stencil) diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr index 0df742d5b9b..7c7f6fccf54 100644 --- a/benchmark/test/reference/spmv_distributed.simple.stderr +++ b/benchmark/test/reference/spmv_distributed.simple.stderr @@ -5,6 +5,6 @@ Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are [csr]x[csr] The number of right hand sides is 1 -Running test case stencil(100,7pt,stencil) +Running test case stencil(100, 7pt, stencil) Matrix is of size (81, 81), 144 Running spmv: csr-csr diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp index 257a2384634..3f26ed3f2fc 100644 --- a/benchmark/utils/generator.hpp +++ b/benchmark/utils/generator.hpp @@ -90,7 +90,7 @@ struct DefaultSystemGenerator { return config["filename"].get(); } else if (config.contains("stencil")) { std::stringstream ss; - ss << "stencil(" << config["size"].get() << "," + ss << "stencil(" << config["size"].get() << ", " << config["stencil"].get() << ")"; return ss.str(); } else { @@ -231,8 +231,8 @@ struct DistributedDefaultSystemGenerator { return config["filename"].get(); } else if (config.contains("stencil")) { std::stringstream ss; - ss << "stencil(" << config["size"].get() << "," - << config["stencil"].get() << "," + ss << "stencil(" << config["size"].get() << ", " + << config["stencil"].get() << ", " << config["comm_pattern"].get() << ")"; return ss.str(); } else { diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt index b95cfa5606a..6f413e458b9 100644 --- a/third_party/nlohmann_json/CMakeLists.txt +++ b/third_party/nlohmann_json/CMakeLists.txt @@ -6,4 +6,5 @@ FetchContent_Declare( GIT_TAG v3.9.1 ) set(JSON_BuildTests OFF CACHE INTERNAL "") +set(JSON_Install OFF CACHE INTERNAL "") FetchContent_MakeAvailable(nlohmann_json) From fe3789ce2c66919109fd2a18d3f67ec8e0bddeb8 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 30 Aug 2023 11:30:08 +0200 Subject: [PATCH 13/13] keep trailing EOL --- benchmark/test/reference/blas.profile.stdout | 2 +- benchmark/test/reference/blas.simple.stdout | 2 +- benchmark/test/reference/conversion.all.stdout | 2 +- benchmark/test/reference/conversion.matrix.stdout | 2 +- benchmark/test/reference/conversion.profile.stdout | 2 +- benchmark/test/reference/conversion.simple.stdout | 2 +- benchmark/test/reference/distributed_solver.matrix.stdout | 2 +- benchmark/test/reference/distributed_solver.profile.stdout | 2 +- benchmark/test/reference/distributed_solver.simple.stdout | 2 +- benchmark/test/reference/matrix_statistics.matrix.stdout | 2 +- benchmark/test/reference/matrix_statistics.simple.stdout | 2 +- .../test/reference/multi_vector_distributed.profile.stdout | 2 +- .../test/reference/multi_vector_distributed.simple.stdout | 2 +- benchmark/test/reference/preconditioner.matrix.stdout | 2 +- benchmark/test/reference/preconditioner.profile.stdout | 2 +- benchmark/test/reference/preconditioner.simple.stdout | 2 +- benchmark/test/reference/solver.matrix.stdout | 2 +- benchmark/test/reference/solver.profile.stdout | 2 +- benchmark/test/reference/solver.simple.stdout | 2 +- benchmark/test/reference/sparse_blas.matrix.stdout | 2 +- benchmark/test/reference/sparse_blas.profile.stdout | 2 +- benchmark/test/reference/sparse_blas.simple.stdout | 2 +- benchmark/test/reference/spmv.matrix.stdout | 2 +- benchmark/test/reference/spmv.profile.stdout | 2 +- benchmark/test/reference/spmv.simple.stdout | 2 +- benchmark/test/reference/spmv_distributed.profile.stdout | 2 +- benchmark/test/reference/spmv_distributed.simple.stdout | 2 +- benchmark/test/test_framework.py.in | 3 ++- 28 files changed, 29 insertions(+), 28 deletions(-) diff --git a/benchmark/test/reference/blas.profile.stdout b/benchmark/test/reference/blas.profile.stdout index 8998d5eaed7..209e115b557 100644 --- a/benchmark/test/reference/blas.profile.stdout +++ b/benchmark/test/reference/blas.profile.stdout @@ -25,4 +25,4 @@ } } } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/blas.simple.stdout b/benchmark/test/reference/blas.simple.stdout index a586a9bc57b..54745d81104 100644 --- a/benchmark/test/reference/blas.simple.stdout +++ b/benchmark/test/reference/blas.simple.stdout @@ -25,4 +25,4 @@ } } } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/conversion.all.stdout b/benchmark/test/reference/conversion.all.stdout index 0c77d464793..e7a5b8f0f51 100644 --- a/benchmark/test/reference/conversion.all.stdout +++ b/benchmark/test/reference/conversion.all.stdout @@ -73,4 +73,4 @@ "cols": 125, "nonzeros": 725 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/conversion.matrix.stdout b/benchmark/test/reference/conversion.matrix.stdout index 7f27b0c25b3..8489e4b30b4 100644 --- a/benchmark/test/reference/conversion.matrix.stdout +++ b/benchmark/test/reference/conversion.matrix.stdout @@ -27,4 +27,4 @@ "cols": 36, "nonzeros": 208 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/conversion.profile.stdout b/benchmark/test/reference/conversion.profile.stdout index a9c3ea674fa..907eac5b951 100644 --- a/benchmark/test/reference/conversion.profile.stdout +++ b/benchmark/test/reference/conversion.profile.stdout @@ -28,4 +28,4 @@ "cols": 125, "nonzeros": 725 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/conversion.simple.stdout b/benchmark/test/reference/conversion.simple.stdout index 81c735789d1..91b69b8a248 100644 --- a/benchmark/test/reference/conversion.simple.stdout +++ b/benchmark/test/reference/conversion.simple.stdout @@ -28,4 +28,4 @@ "cols": 125, "nonzeros": 725 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/distributed_solver.matrix.stdout b/benchmark/test/reference/distributed_solver.matrix.stdout index ec1d258e2f4..67ac333bec5 100644 --- a/benchmark/test/reference/distributed_solver.matrix.stdout +++ b/benchmark/test/reference/distributed_solver.matrix.stdout @@ -54,4 +54,4 @@ "rows": 36, "cols": 36 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout index 55dfb1dc428..0a844879c4f 100644 --- a/benchmark/test/reference/distributed_solver.profile.stdout +++ b/benchmark/test/reference/distributed_solver.profile.stdout @@ -30,4 +30,4 @@ "rows": 125, "cols": 125 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout index eed8d864388..458115e6ab2 100644 --- a/benchmark/test/reference/distributed_solver.simple.stdout +++ b/benchmark/test/reference/distributed_solver.simple.stdout @@ -56,4 +56,4 @@ "rows": 125, "cols": 125 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/matrix_statistics.matrix.stdout b/benchmark/test/reference/matrix_statistics.matrix.stdout index a6297e89b66..f5eba9461f7 100644 --- a/benchmark/test/reference/matrix_statistics.matrix.stdout +++ b/benchmark/test/reference/matrix_statistics.matrix.stdout @@ -36,4 +36,4 @@ "cols": 36, "nonzeros": 208 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/matrix_statistics.simple.stdout b/benchmark/test/reference/matrix_statistics.simple.stdout index 923bbc9f962..23124781a7d 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stdout +++ b/benchmark/test/reference/matrix_statistics.simple.stdout @@ -37,4 +37,4 @@ "cols": 125, "nonzeros": 725 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout index 8998d5eaed7..209e115b557 100644 --- a/benchmark/test/reference/multi_vector_distributed.profile.stdout +++ b/benchmark/test/reference/multi_vector_distributed.profile.stdout @@ -25,4 +25,4 @@ } } } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout index a586a9bc57b..54745d81104 100644 --- a/benchmark/test/reference/multi_vector_distributed.simple.stdout +++ b/benchmark/test/reference/multi_vector_distributed.simple.stdout @@ -25,4 +25,4 @@ } } } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/preconditioner.matrix.stdout b/benchmark/test/reference/preconditioner.matrix.stdout index 51adb7383c3..742ec55c41d 100644 --- a/benchmark/test/reference/preconditioner.matrix.stdout +++ b/benchmark/test/reference/preconditioner.matrix.stdout @@ -28,4 +28,4 @@ "cols": 36, "nonzeros": 208 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout index e33a6502eea..526349b55ad 100644 --- a/benchmark/test/reference/preconditioner.profile.stdout +++ b/benchmark/test/reference/preconditioner.profile.stdout @@ -21,4 +21,4 @@ "cols": 125, "nonzeros": 725 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout index 06291228a1c..ed567dcbb13 100644 --- a/benchmark/test/reference/preconditioner.simple.stdout +++ b/benchmark/test/reference/preconditioner.simple.stdout @@ -29,4 +29,4 @@ "cols": 125, "nonzeros": 725 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/solver.matrix.stdout b/benchmark/test/reference/solver.matrix.stdout index a87e78f7f66..594a3887921 100644 --- a/benchmark/test/reference/solver.matrix.stdout +++ b/benchmark/test/reference/solver.matrix.stdout @@ -52,4 +52,4 @@ "rows": 36, "cols": 36 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout index 906c74de5e7..c132ed1a572 100644 --- a/benchmark/test/reference/solver.profile.stdout +++ b/benchmark/test/reference/solver.profile.stdout @@ -29,4 +29,4 @@ "rows": 125, "cols": 125 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout index 5d127fe4b78..0ee0e4b9a4b 100644 --- a/benchmark/test/reference/solver.simple.stdout +++ b/benchmark/test/reference/solver.simple.stdout @@ -53,4 +53,4 @@ "rows": 125, "cols": 125 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/sparse_blas.matrix.stdout b/benchmark/test/reference/sparse_blas.matrix.stdout index 74fdbf98e7a..a50fa1159d9 100644 --- a/benchmark/test/reference/sparse_blas.matrix.stdout +++ b/benchmark/test/reference/sparse_blas.matrix.stdout @@ -21,4 +21,4 @@ "cols": 36, "nonzeros": 208 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/sparse_blas.profile.stdout b/benchmark/test/reference/sparse_blas.profile.stdout index e9d48fde23d..45cb7e2638a 100644 --- a/benchmark/test/reference/sparse_blas.profile.stdout +++ b/benchmark/test/reference/sparse_blas.profile.stdout @@ -15,4 +15,4 @@ "cols": 125, "nonzeros": 725 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/sparse_blas.simple.stdout b/benchmark/test/reference/sparse_blas.simple.stdout index 3cc5f774ebf..a44f4f189b2 100644 --- a/benchmark/test/reference/sparse_blas.simple.stdout +++ b/benchmark/test/reference/sparse_blas.simple.stdout @@ -22,4 +22,4 @@ "cols": 125, "nonzeros": 725 } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/spmv.matrix.stdout b/benchmark/test/reference/spmv.matrix.stdout index 4d03ce3cd07..ea5927ba148 100644 --- a/benchmark/test/reference/spmv.matrix.stdout +++ b/benchmark/test/reference/spmv.matrix.stdout @@ -17,4 +17,4 @@ "spmv": "coo" } } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout index 409a92d4e33..6e4701af719 100644 --- a/benchmark/test/reference/spmv.profile.stdout +++ b/benchmark/test/reference/spmv.profile.stdout @@ -17,4 +17,4 @@ "spmv": "coo" } } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/spmv.simple.stdout b/benchmark/test/reference/spmv.simple.stdout index 9601a15b331..38f2598c616 100644 --- a/benchmark/test/reference/spmv.simple.stdout +++ b/benchmark/test/reference/spmv.simple.stdout @@ -18,4 +18,4 @@ "spmv": "coo" } } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout index 8de6a68ae8a..bbef87d0b89 100644 --- a/benchmark/test/reference/spmv_distributed.profile.stdout +++ b/benchmark/test/reference/spmv_distributed.profile.stdout @@ -18,4 +18,4 @@ "spmv": "csr-csr" } } -] \ No newline at end of file +] diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout index f94e4b992a1..77bdef168d3 100644 --- a/benchmark/test/reference/spmv_distributed.simple.stdout +++ b/benchmark/test/reference/spmv_distributed.simple.stdout @@ -19,4 +19,4 @@ "spmv": "csr-csr" } } -] \ No newline at end of file +] diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index 1a07818df1f..62c4293e7c0 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -92,7 +92,8 @@ def sanitize_json_text(input: str) -> List[str]: """ result = json.dumps(sanitize_json(json.loads(input)), indent=4) - return result.splitlines() + # json.dumps doesn't add a trailing newline + return result.splitlines() + [""] def sanitize_text(