diff --git a/benchmark/test/CMakeLists.txt b/benchmark/test/CMakeLists.txt index e1aab6dd75d..1cd589927fa 100644 --- a/benchmark/test/CMakeLists.txt +++ b/benchmark/test/CMakeLists.txt @@ -22,5 +22,7 @@ add_benchmark_test(solver) add_benchmark_test(sparse_blas) add_benchmark_test(spmv) if (GINKGO_BUILD_MPI) + add_benchmark_test(multi_vector_distributed) + add_benchmark_test(spmv_distributed) add_benchmark_test(solver_distributed) -endif() +endif() \ No newline at end of file diff --git a/benchmark/test/input.distributed_mtx.json b/benchmark/test/input.distributed_mtx.json new file mode 100644 index 00000000000..aca115179e6 --- /dev/null +++ b/benchmark/test/input.distributed_mtx.json @@ -0,0 +1,7 @@ +[ + { + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil" + } +] \ No newline at end of file diff --git a/benchmark/test/multi_vector_distributed.py b/benchmark/test/multi_vector_distributed.py new file mode 100644 index 00000000000..1e0c4c8adf5 --- /dev/null +++ b/benchmark/test/multi_vector_distributed.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +import test_framework + +# check that all input modes work: +# parameter +test_framework.compare_output_distributed( + ["-input", '[{"n": 100}]'], + expected_stdout="multi_vector_distributed.simple.stdout", + expected_stderr="multi_vector_distributed.simple.stderr", + num_procs=3, +) + +# stdin +test_framework.compare_output_distributed( + [], + expected_stdout="multi_vector_distributed.simple.stdout", + expected_stderr="multi_vector_distributed.simple.stderr", + stdin='[{"n": 100}]', + num_procs=3, +) + +# file +test_framework.compare_output_distributed( + ["-input", str(test_framework.sourcepath / "input.blas.json")], + expected_stdout="multi_vector_distributed.simple.stdout", + expected_stderr="multi_vector_distributed.simple.stderr", + stdin='[{"n": 100}]', + num_procs=3, +) + +# profiler annotations +test_framework.compare_output_distributed( + ["-input", '[{"n": 100}]', "-profile", "-profiler_hook", "debug"], + expected_stdout="multi_vector_distributed.profile.stdout", + expected_stderr="multi_vector_distributed.profile.stderr", + stdin='[{"n": 100}]', + num_procs=3, +) diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr index b64f4321287..1313c85e462 100644 --- a/benchmark/test/reference/blas.profile.stderr +++ b/benchmark/test/reference/blas.profile.stderr @@ -4,11 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The operations are copy,axpy,scal -Running test case -{ - "n": 100, - "blas": {} -} +Running test case n = 100 DEBUG: begin n = 100 Running blas: copy DEBUG: begin copy diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr index f41b25c6ee1..966ed597166 100644 --- a/benchmark/test/reference/blas.simple.stderr +++ b/benchmark/test/reference/blas.simple.stderr @@ -4,11 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The operations are copy,axpy,scal -Running test case -{ - "n": 100, - "blas": {} -} +Running test case n = 100 Running blas: copy Running blas: axpy Running blas: scal diff --git a/benchmark/test/reference/conversion.all.stderr b/benchmark/test/reference/conversion.all.stderr index 1d5df7477ba..77ff50a1b89 100644 --- a/benchmark/test/reference/conversion.all.stderr +++ b/benchmark/test/reference/conversion.all.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr,ell,sellp,hybrid -Running test case -{ - "size": 100, - "stencil": "7pt", - "conversion": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running conversion: coo-read Running conversion: coo-csr diff --git a/benchmark/test/reference/conversion.profile.stderr b/benchmark/test/reference/conversion.profile.stderr index 089e6be02f9..6078dd3db2f 100644 --- a/benchmark/test/reference/conversion.profile.stderr +++ b/benchmark/test/reference/conversion.profile.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Running test case -{ - "size": 100, - "stencil": "7pt", - "conversion": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 DEBUG: begin stencil(100,7pt) Running conversion: coo-read diff --git a/benchmark/test/reference/conversion.simple.stderr b/benchmark/test/reference/conversion.simple.stderr index a814dba6888..9b51effac09 100644 --- a/benchmark/test/reference/conversion.simple.stderr +++ b/benchmark/test/reference/conversion.simple.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo,csr -Running test case -{ - "size": 100, - "stencil": "7pt", - "conversion": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running conversion: coo-read Running conversion: coo-csr diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr index e583a1411a8..1daab773a38 100644 --- a/benchmark/test/reference/distributed_solver.profile.stderr +++ b/benchmark/test/reference/distributed_solver.profile.stderr @@ -5,16 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "comm_pattern": "stencil", - "optimal": { - "spmv": "csr-csr" - }, - "solver": {} -} +Running test case stencil(100,7pt,stencil) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin partition::build_ranges_from_global_size diff --git a/benchmark/test/reference/distributed_solver.simple.stderr b/benchmark/test/reference/distributed_solver.simple.stderr index 9feb7fa9522..607081a3949 100644 --- a/benchmark/test/reference/distributed_solver.simple.stderr +++ b/benchmark/test/reference/distributed_solver.simple.stderr @@ -5,15 +5,6 @@ Running with 2 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "comm_pattern": "stencil", - "optimal": { - "spmv": "csr-csr" - }, - "solver": {} -} +Running test case stencil(100,7pt,stencil) Matrix is of size (125, 125) Running solver: cg diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr index 75a7cca709f..d02edbc44da 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stderr +++ b/benchmark/test/reference/matrix_statistics.simple.stderr @@ -1,9 +1,4 @@ This is Ginkgo 1.7.0 (develop) running with core module 1.7.0 (develop) -Running test case -{ - "size": 100, - "stencil": "7pt", - "problem": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr new file mode 100644 index 00000000000..a77484daacb --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr @@ -0,0 +1,254 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 0 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +The operations are copy,axpy,scal +Running test case n = 100 +DEBUG: begin n = 100 + Running blas: copy +DEBUG: begin copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end copy + Running blas: axpy +DEBUG: begin axpy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::add_scaled +DEBUG: end dense::add_scaled +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end axpy + Running blas: scal +DEBUG: begin scal +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::scale +DEBUG: end dense::scale +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end scal +DEBUG: end n = 100 diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stdout b/benchmark/test/reference/multi_vector_distributed.profile.stdout new file mode 100644 index 00000000000..3a2e7e54f80 --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed.profile.stdout @@ -0,0 +1,29 @@ + +[ + { + "n": 100, + "blas": { + "copy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 1, + "completed": true + }, + "axpy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 1, + "completed": true + }, + "scal": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 1, + "completed": true + } + } + } +] diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stderr b/benchmark/test/reference/multi_vector_distributed.simple.stderr new file mode 100644 index 00000000000..966ed597166 --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed.simple.stderr @@ -0,0 +1,10 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The operations are copy,axpy,scal +Running test case n = 100 + Running blas: copy + Running blas: axpy + Running blas: scal diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stdout b/benchmark/test/reference/multi_vector_distributed.simple.stdout new file mode 100644 index 00000000000..08e692727fe --- /dev/null +++ b/benchmark/test/reference/multi_vector_distributed.simple.stdout @@ -0,0 +1,29 @@ + +[ + { + "n": 100, + "blas": { + "copy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + }, + "axpy": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + }, + "scal": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "completed": true + } + } + } +] diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr index c215b22c925..def3a83993d 100644 --- a/benchmark/test/reference/preconditioner.profile.stderr +++ b/benchmark/test/reference/preconditioner.profile.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running with preconditioners: none -Running test case -{ - "size": 100, - "stencil": "7pt", - "preconditioner": {} -} +Running test case stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::fill_array diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr index 07d2cca6704..0090e180d2b 100644 --- a/benchmark/test/reference/preconditioner.simple.stderr +++ b/benchmark/test/reference/preconditioner.simple.stderr @@ -4,11 +4,6 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 Running with preconditioners: none -Running test case -{ - "size": 100, - "stencil": "7pt", - "preconditioner": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running preconditioner: none diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr index 0c3f7060796..43ff852f68e 100644 --- a/benchmark/test/reference/solver.profile.stderr +++ b/benchmark/test/reference/solver.profile.stderr @@ -5,15 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "optimal": { - "spmv": "csr" - }, - "solver": {} -} +Running test case stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::fill_array diff --git a/benchmark/test/reference/solver.simple.stderr b/benchmark/test/reference/solver.simple.stderr index c5e4267a6bd..659dd026588 100644 --- a/benchmark/test/reference/solver.simple.stderr +++ b/benchmark/test/reference/solver.simple.stderr @@ -5,14 +5,6 @@ Running with 2 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "optimal": { - "spmv": "csr" - }, - "solver": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125) Running solver: cg diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr index d1434dad146..c47ce2a515b 100644 --- a/benchmark/test/reference/sparse_blas.profile.stderr +++ b/benchmark/test/reference/sparse_blas.profile.stderr @@ -4,12 +4,7 @@ Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The operations are transpose -Running test case -{ - "size": 100, - "stencil": "7pt", - "sparse_blas": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 DEBUG: begin allocate DEBUG: end allocate diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr index 452374a9268..1f2bb34809f 100644 --- a/benchmark/test/reference/sparse_blas.simple.stderr +++ b/benchmark/test/reference/sparse_blas.simple.stderr @@ -4,11 +4,6 @@ Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The operations are transpose -Running test case -{ - "size": 100, - "stencil": "7pt", - "sparse_blas": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running sparse_blas: transpose diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr index 09a10b725ea..4ff0125782f 100644 --- a/benchmark/test/reference/spmv.profile.stderr +++ b/benchmark/test/reference/spmv.profile.stderr @@ -5,12 +5,7 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 The formats are coo The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "spmv": {} -} +Running test case stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin allocate diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr index a910512ff31..9d5047febb6 100644 --- a/benchmark/test/reference/spmv.simple.stderr +++ b/benchmark/test/reference/spmv.simple.stderr @@ -5,11 +5,6 @@ Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 The formats are coo The number of right hand sides is 1 -Running test case -{ - "size": 100, - "stencil": "7pt", - "spmv": {} -} +Running test case stencil(100,7pt) Matrix is of size (125, 125), 725 Running spmv: coo diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr new file mode 100644 index 00000000000..95a07c8275c --- /dev/null +++ b/benchmark/test/reference/spmv_distributed.profile.stderr @@ -0,0 +1,446 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 0 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +The formats are [csr]x[csr] +The number of right hand sides is 1 +Running test case stencil(100,7pt,stencil) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill_in_matrix_data +DEBUG: end dense::fill_in_matrix_data +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill_in_matrix_data +DEBUG: end dense::fill_in_matrix_data +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +Matrix is of size (81, 81), 144 +DEBUG: begin stencil(100,7pt,stencil) + Running spmv: csr-csr +DEBUG: begin csr-csr +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin copy() +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: end copy() +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin copy() +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: end copy() +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin distributed_matrix::build_local_nonlocal +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end distributed_matrix::build_local_nonlocal +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin components::convert_idxs_to_ptrs +DEBUG: end components::convert_idxs_to_ptrs +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin components::convert_idxs_to_ptrs +DEBUG: end components::convert_idxs_to_ptrs +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy() +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: end copy() +DEBUG: begin apply() +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::row_gather +DEBUG: end dense::row_gather +DEBUG: begin apply() +DEBUG: begin csr::spmv +DEBUG: end csr::spmv +DEBUG: end apply() +DEBUG: begin advanced_apply() +DEBUG: begin csr::advanced_spmv +DEBUG: end csr::advanced_spmv +DEBUG: end advanced_apply() +DEBUG: end apply() +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end csr-csr +DEBUG: end stencil(100,7pt,stencil) +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout new file mode 100644 index 00000000000..ebacddb887c --- /dev/null +++ b/benchmark/test/reference/spmv_distributed.profile.stdout @@ -0,0 +1,22 @@ + +[ + { + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil", + "spmv": { + "csr-csr": { + "storage": 6420, + "time": 1.0, + "repetitions": 1, + "completed": true + } + }, + "rows": 81, + "cols": 81, + "nonzeros": 144, + "optimal": { + "spmv": "csr-csr" + } + } +] diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr new file mode 100644 index 00000000000..0df742d5b9b --- /dev/null +++ b/benchmark/test/reference/spmv_distributed.simple.stderr @@ -0,0 +1,10 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The formats are [csr]x[csr] +The number of right hand sides is 1 +Running test case stencil(100,7pt,stencil) +Matrix is of size (81, 81), 144 + Running spmv: csr-csr diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout new file mode 100644 index 00000000000..64203476f91 --- /dev/null +++ b/benchmark/test/reference/spmv_distributed.simple.stdout @@ -0,0 +1,23 @@ + +[ + { + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil", + "spmv": { + "csr-csr": { + "storage": 6420, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + } + }, + "rows": 81, + "cols": 81, + "nonzeros": 144, + "optimal": { + "spmv": "csr-csr" + } + } +] diff --git a/benchmark/test/spmv_distributed.py b/benchmark/test/spmv_distributed.py new file mode 100644 index 00000000000..356db48459e --- /dev/null +++ b/benchmark/test/spmv_distributed.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +import test_framework + +# check that all input modes work: +# parameter +test_framework.compare_output_distributed( + ["-input", '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]'], + expected_stdout="spmv_distributed.simple.stdout", + expected_stderr="spmv_distributed.simple.stderr", + num_procs=3, +) + +# stdin +test_framework.compare_output_distributed( + [], + expected_stdout="spmv_distributed.simple.stdout", + expected_stderr="spmv_distributed.simple.stderr", + num_procs=3, + stdin='[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', +) + +# input file +test_framework.compare_output_distributed( + ["-input", str(test_framework.sourcepath / "input.distributed_mtx.json")], + expected_stdout="spmv_distributed.simple.stdout", + expected_stderr="spmv_distributed.simple.stderr", + num_procs=3, +) + +# profiler annotations +test_framework.compare_output_distributed( + [ + "-input", + '[{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}]', + "-profile", + "-profiler_hook", + "debug", + ], + expected_stdout="spmv_distributed.profile.stdout", + expected_stderr="spmv_distributed.profile.stderr", + num_procs=3, +) diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in index 6037f8c594e..5267b798648 100644 --- a/benchmark/test/test_framework.py.in +++ b/benchmark/test/test_framework.py.in @@ -99,8 +99,7 @@ def sanitize_json_in_text(lines: List[str]) -> List[str]: for begin, end, do_sanitize in combined_pairs ] reconstructed = [ - json.dumps(sanitize_json(json.loads(t)), - indent=4) if do_sanitize else t + json.dumps(sanitize_json(json.loads(t)), indent=4) if do_sanitize else t for t, do_sanitize in texts ] return "\n".join(reconstructed).split("\n") @@ -215,8 +214,7 @@ def compare_output_impl( print("FAIL: stdout differs") print( "\n".join( - difflib.unified_diff( - expected_stdout_processed, result_stdout_processed) + difflib.unified_diff(expected_stdout_processed, result_stdout_processed) ) ) failed = True @@ -224,8 +222,7 @@ def compare_output_impl( print("FAIL: stderr differs") print( "\n".join( - difflib.unified_diff( - expected_stderr_processed, result_stderr_processed) + difflib.unified_diff(expected_stderr_processed, result_stderr_processed) ) ) failed = True @@ -249,7 +246,7 @@ def compare_output( def compare_output_distributed( args, expected_stdout, expected_stderr, num_procs, stdin="" ): - compare_output( + compare_output_impl( args, expected_stdout, expected_stderr, diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index 8173f66ad3f..5d28b00838a 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -226,45 +226,6 @@ std::shared_ptr create_profiler_hook( } -struct owning_profiling_scope_guard { - std::string name; - gko::log::profiling_scope_guard guard; - - owning_profiling_scope_guard() = default; - - owning_profiling_scope_guard(std::string name_, - gko::log::ProfilerHook* profiler_hook) - : name(std::move(name_)), guard{profiler_hook->user_range(name.c_str())} - {} -}; - - -struct annotate_functor { - owning_profiling_scope_guard operator()(std::string name) const - { - if (profiler_hook) { - return owning_profiling_scope_guard{std::move(name), - profiler_hook.get()}; - } - return {}; - } - - gko::log::profiling_scope_guard operator()(const char* name) const - { - if (profiler_hook) { - return profiler_hook->user_range(name); - } - return {}; - } - - annotate_functor(std::shared_ptr profiler_hook) - : profiler_hook{std::move(profiler_hook)} - {} - - std::shared_ptr profiler_hook; -}; - - // Returns a random number engine std::default_random_engine& get_engine() { diff --git a/benchmark/utils/runner.hpp b/benchmark/utils/runner.hpp index 3520f7299ee..661c403706f 100644 --- a/benchmark/utils/runner.hpp +++ b/benchmark/utils/runner.hpp @@ -153,13 +153,13 @@ void run_test_cases(const Benchmark& benchmark, if (!test_case.contains(benchmark.get_name())) { test_case[benchmark.get_name()] = json::object(); } + auto test_case_desc = benchmark.describe_config(test_case); if (benchmark.should_print()) { - std::clog << "Running test case\n" - << std::setw(4) << test_case << std::endl; + std::clog << "Running test case " << test_case_desc + << std::endl; } auto test_case_state = benchmark.setup(exec, test_case); - auto test_case_str = benchmark.describe_config(test_case); - auto test_case_range = annotate(test_case_str.c_str()); + auto test_case_range = annotate(test_case_desc.c_str()); auto& benchmark_case = test_case[benchmark.get_name()]; for (const auto& operation_name : benchmark.get_operations()) { if (benchmark_case.contains(operation_name) && @@ -183,7 +183,7 @@ void run_test_cases(const Benchmark& benchmark, gko::name_demangling::get_dynamic_type(e); operation_case["error"] = e.what(); std::cerr << "Error when processing test case\n" - << std::setw(4) << test_case << "\n" + << test_case_desc << "\n" << "what(): " << e.what() << std::endl; }