From 4edc7e1ed04027b2ffec9a6734eaa0e6597b9c4b Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 7 Jan 2021 18:38:18 +0100 Subject: [PATCH] add memory movement estimates to solvers Co-authored-by: Hartwig Anzt --- core/solver/bicg.cpp | 8 ++++++++ core/solver/bicgstab.cpp | 11 +++++++++++ core/solver/cg.cpp | 8 ++++++++ core/solver/cgs.cpp | 9 +++++++++ core/solver/fcg.cpp | 8 ++++++++ core/solver/gmres.cpp | 11 +++++++++++ core/solver/idr.cpp | 16 ++++++++++++++++ 7 files changed, 71 insertions(+) diff --git a/core/solver/bicg.cpp b/core/solver/bicg.cpp index 411ae62957d..dbd2ceb334d 100644 --- a/core/solver/bicg.cpp +++ b/core/solver/bicg.cpp @@ -184,6 +184,14 @@ void Bicg::apply_impl(const LinOp *b, LinOp *x) const int iter = -1; + /* Memory movement summary: + * 27n * values + 2 * matrix/preconditioner storage + * 2x SpMV: 4n * values + 2 * storage + * 2x Preconditioner: 4n * values + 2 * storage + * 2x dot 4n + * 1x step 1 (axpys) 6n + * 1x step 2 (axpys) 9n + */ while (true) { get_preconditioner()->apply(r.get(), z.get()); conj_trans_preconditioner->apply(r2.get(), z2.get()); diff --git a/core/solver/bicgstab.cpp b/core/solver/bicgstab.cpp index 583b1dbb5ad..c5216548bfc 100644 --- a/core/solver/bicgstab.cpp +++ b/core/solver/bicgstab.cpp @@ -136,6 +136,17 @@ void Bicgstab::apply_impl(const LinOp *b, LinOp *x) const rr->copy_from(r.get()); int iter = -1; + + /* Memory movement summary: + * 29n * values + 2 * matrix/preconditioner storage + * 2x SpMV: 4n * values + 2 * storage + * 2x Preconditioner: 4n * values + 2 * storage + * 3x dot 6n + * 1x norm2 n + * 1x step 1 (fused axpys) 4n + * 1x step 2 (axpy) 3n + * 1x step 3 (fused axpys) 7n + */ while (true) { ++iter; this->template log(this, iter, r.get(), diff --git a/core/solver/cg.cpp b/core/solver/cg.cpp index 8b5aa5ea002..01865f9bf4c 100644 --- a/core/solver/cg.cpp +++ b/core/solver/cg.cpp @@ -128,6 +128,14 @@ void Cg::apply_impl(const LinOp *b, LinOp *x) const x, r.get()); int iter = -1; + /* Memory movement summary: + * 17n * values + matrix/preconditioner storage + * 1x SpMV: 2n * values + storage + * 1x Preconditioner: 2n * values + storage + * 2x dot 4n + * 1x step 1 (axpy) 3n + * 1x step 2 (axpys) 6n + */ while (true) { get_preconditioner()->apply(r.get(), z.get()); r->compute_dot(z.get(), rho.get()); diff --git a/core/solver/cgs.cpp b/core/solver/cgs.cpp index 5039bf464af..3ccd42073c3 100644 --- a/core/solver/cgs.cpp +++ b/core/solver/cgs.cpp @@ -138,6 +138,15 @@ void Cgs::apply_impl(const LinOp *b, LinOp *x) const r_tld->copy_from(r.get()); int iter = 0; + /* Memory movement summary: + * 27n * values + 2 * matrix/preconditioner storage + * 2x SpMV: 4n * values + 2 * storage + * 2x Preconditioner: 4n * values + 2 * storage + * 2x dot 4n + * 1x step 1 (fused axpys) 5n + * 1x step 2 (fused axpys) 4n + * 1x step 3 (axpys) 6n + */ while (true) { r->compute_dot(r_tld.get(), rho.get()); // beta = rho / rho_prev diff --git a/core/solver/fcg.cpp b/core/solver/fcg.cpp index 1f3f8121120..3668ba9f0ed 100644 --- a/core/solver/fcg.cpp +++ b/core/solver/fcg.cpp @@ -130,6 +130,14 @@ void Fcg::apply_impl(const LinOp *b, LinOp *x) const x, r.get()); int iter = -1; + /* Memory movement summary: + * 20n * values + matrix/preconditioner storage + * 1x SpMV: 2n * values + storage + * 1x Preconditioner: 2n * values + storage + * 3x dot 6n + * 1x step 1 (axpy) 3n + * 1x step 2 (fused axpys) 7n + */ while (true) { get_preconditioner()->apply(r.get(), z.get()); r->compute_dot(z.get(), rho.get()); diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index aa42de261e4..f997c490819 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -159,6 +159,17 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const auto after_preconditioner = matrix::Dense::create_with_config_of(dense_x); + /* Memory movement summary for iteration in Krylov subspace of dimension d + * (== krylov_dim / 2 on average), ignoring restarts: + * (5d+7)n * values + matrix/preconditioner storage + * 1x SpMV: 2n * values + storage + * 1x Preconditioner: 2n * values + storage + * MGS: (5d+3)n + * dx dot 2dn + * dx axpys 3dn + * 1x norm2 n + * 1x scal 2n + */ while (true) { ++total_iter; this->template log( diff --git a/core/solver/idr.cpp b/core/solver/idr.cpp index ef49fca20da..a71bf521e3f 100644 --- a/core/solver/idr.cpp +++ b/core/solver/idr.cpp @@ -170,6 +170,22 @@ void Idr::iterate(const LinOp *b, LinOp *x) const int total_iter = -1; + /* Memory movement summary for iteration with subspace dimension d + * (3d²+12(d+1))n * values + d * matrix/preconditioner storage + * dx SpMV: 2dn * values + d * storage + * dx Preconditioner: 2dn * values + d * storage + * 1x multidot (gemm) (d+1)n + * dx step 1 (fused axpys) d(d/2+2)n on average + * dx step 2 (fused axpys) d(d/2+2)n on average + * dx step 3: d(2d+4)n on average + * 1x orthogonalize g+u (3k+2)n in kth iteration + * 1x multidot (gemm) kn in (d-k)th iteration + * 2x axpy 6n + * 2x dot 4n + * 1x norm2 n + * 1x scale 2n + * 2x axpy 4n + */ while (true) { ++total_iter; this->template log(