diff --git a/.gitignore b/.gitignore index 567609b..4c5196f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ +deps/ build/ diff --git a/example/run_gofmm_userdefined.sh b/example/run_gofmm_userdefined.sh index c421523..454bb31 100755 --- a/example/run_gofmm_userdefined.sh +++ b/example/run_gofmm_userdefined.sh @@ -39,8 +39,8 @@ then mpiexec="mpirun -n 4"; else mpiexec=""; fi if [ "${HMLP_USE_MPI}" = true ]; -then executable="./test_mpigofmm.x" -else executable="./test_gofmm.x"; +then executable="./test_mpigofmm" +else executable="./test_gofmm"; fi echo "@PRIM" echo 'gofmm' diff --git a/example/sbatch_gofmm_userdefine.sh b/example/sbatch_gofmm_userdefine.sh index c36560f..46689db 100755 --- a/example/sbatch_gofmm_userdefine.sh +++ b/example/sbatch_gofmm_userdefine.sh @@ -80,9 +80,9 @@ hiddenlayer="512-512-512" # ======= Do not change anything below this line ======== mpiexec="ibrun tacc_affinity" #mpiexec="prun" -#executable="./test_mpigofmm.x" +#executable="./test_mpigofmm" #executable="gdb -ex run --args ./test_mpigofmm.x" -executable="./test_gofmm.x" +executable="./test_gofmm" echo "@PRIM" diff --git a/frame/primitives/lowrank.hpp b/frame/primitives/lowrank.hpp index 11f86e2..daf0e6f 100644 --- a/frame/primitives/lowrank.hpp +++ b/frame/primitives/lowrank.hpp @@ -201,33 +201,15 @@ void id jpvt.clear(); jpvt.resize( n, 0 ); - /** Traditional pivoting QR (GEQP3) */ -//#ifdef HMLP_USE_CUDA -// auto *dev = hmlp_get_device( 0 ); -// cublasHandle_t &handle = -// reinterpret_cast( dev )->gethandle( 0 ); -// hmlp::xgeqp3 -// ( -// handle, -// m, n, -// A_tmp.data(), m, -// jpvt.data(), -// tau.data(), -// work.data(), lwork -// ); -//#else - hmlp::xgeqp4 - //hmlp::xgeqp3 - ( - m, n, - A_tmp.data(), m, - jpvt.data(), - tau.data(), - work.data(), lwork - ); -//#endif - //printf( "end xgeqp3\n" ); - + /** Pivoting QR (GEQP3 or GEQP4). */ + if ( true ) + { + hmlp::xgeqp4( m, n, A_tmp.data(), m, jpvt.data(), tau.data(), work.data(), lwork ); + } + else + { + hmlp::xgeqp3( m, n, A_tmp.data(), m, jpvt.data(), tau.data(), work.data(), lwork ); + } /** Shift jpvt from 1-base to 0-base index. */ for ( int j = 0; j < jpvt.size(); j ++ ) { @@ -312,10 +294,10 @@ void id } else { - proj[ j * s + i ] = A_tmp[ j * m + i ]; - } - } - } + proj[ j * s + i ] = A_tmp[ j * m + i ]; + } + } + } } else /** in the old version we use xgels, which is expensive */ { diff --git a/gofmm/gofmm.hpp b/gofmm/gofmm.hpp index edf02f7..802208b 100644 --- a/gofmm/gofmm.hpp +++ b/gofmm/gofmm.hpp @@ -189,7 +189,7 @@ class CommandLineHelper /** (Default) user-defined approximation toleratnce and budget. */ double stol = 1E-3; double budget = 0.0; - bool secure_accuracy = true; + bool secure_accuracy = false; /** (Default) geometric-oblivious scheme. */ DistanceMetric metric = ANGLE_DISTANCE; @@ -1197,17 +1197,12 @@ void Skeletonize( NODE *node ) { /** Derive type T from NODE. */ using T = typename NODE::T; - /** Early return if we do not need to skeletonize. */ - if ( !node->parent ) return; - /** Gather shared data and create reference. */ auto &K = *(node->setup->K); - auto &NN = *(node->setup->NN); auto maxs = node->setup->MaximumRank(); auto stol = node->setup->Tolerance(); bool secure_accuracy = node->setup->SecureAccuracy(); bool use_adaptive_ranks = node->setup->UseAdaptiveRanks(); - /** Gather per node data and create reference. */ auto &data = node->data; auto &skels = data.skels; @@ -1215,30 +1210,15 @@ void Skeletonize( NODE *node ) auto &jpvt = data.jpvt; auto &KIJ = data.KIJ; auto &candidate_cols = data.candidate_cols; - /** Interpolative decomposition (ID). */ size_t N = K.col(); size_t m = KIJ.row(); size_t n = KIJ.col(); size_t q = node->n; - -// /* IMTODO: change this decision to a function call. */ -// if ( secure_accuracy ) -// { -// if ( !node->isleaf && ( !node->lchild->data.is_compressed || !node->rchild->data.is_compressed ) ) -// { -// skels.clear(); -// proj.resize( 0, 0 ); -// data.is_compressed = false; -// return; -// } -// } - /** Bill's l2 norm scaling factor. */ T scaled_stol = std::sqrt( (T)n / q ) * std::sqrt( (T)m / (N - q) ) * stol; - /** Account for uniform sampling. */ - scaled_stol *= std::sqrt( (T)q / N ); - + /** TODO: check if this is needed? Account for uniform sampling. */ + if ( true ) scaled_stol *= std::sqrt( (T)q / N ); /** Call adaptive interpolative decomposition primitive. */ lowrank::id( use_adaptive_ranks, secure_accuracy, KIJ.row(), KIJ.col(), maxs, scaled_stol, KIJ, skels, proj, jpvt ); @@ -1249,13 +1229,6 @@ void Skeletonize( NODE *node ) { skels[ i ] = candidate_cols[ skels[ i ] ]; } - -// /** Depending on the flag, decide is_compressed or not. */ -// data.is_compressed = ( secure_accuracy ) ? skels.size() : true; -// -// /** Sanity check. */ -// if ( data.is_compressed ) assert( skels.size() && proj.size() && jpvt.size() ); - }; /* end Skeletonize() */ @@ -1309,6 +1282,8 @@ class SkeletonizeTask : public Task void Execute( Worker* user_worker ) { + /** Early return if we do not need to skeletonize. */ + if ( !arg->parent ) return; /* Check if we need to secure the accuracy? */ bool secure_accuracy = arg->setup->SecureAccuracy(); /* Gather per node data and create reference. */ @@ -3661,7 +3636,8 @@ void SelfTesting( TREE &tree, size_t ntest, size_t nrhs ) printf( "========================================================\n"); for ( size_t i = 0; i < ntest; i ++ ) { - size_t tar = i * n / ntest; + //size_t tar = i * n / ntest; + size_t tar = i * 1000; Data potentials; /** ASKIT treecode with NN pruning. */ Evaluate( tree, tar, potentials, EVALUATE_OPTION_NEIGHBOR_PRUNING ); diff --git a/gofmm/gofmm_mpi.hpp b/gofmm/gofmm_mpi.hpp index 8fa20b5..c1e96d2 100644 --- a/gofmm/gofmm_mpi.hpp +++ b/gofmm/gofmm_mpi.hpp @@ -3335,27 +3335,27 @@ class DistSkeletonKIJTask : public Task -/** - * @brief Skeletonization with interpolative decomposition. - */ -template -void DistSkeletonize_v2( NODE *node ) -{ - /** Early return if we do not need to skeletonize. */ - if ( !node->parent ) return; - - /* Get the node communicator. */ - mpi::Comm comm = arg->GetComm(); - - /* All children should have is_compressed properly set. Use a Reduce to AND all is_compressed. */ - - - - //if ( arg->GetCommRank() == 0 ) - // { - // DistSkeletonize( arg ); - // } -}; +///** +// * @brief Skeletonization with interpolative decomposition. +// */ +//template +//void DistSkeletonize_v2( NODE *node ) +//{ +// /** Early return if we do not need to skeletonize. */ +// if ( !node->parent ) return; +// +// /* Get the node communicator. */ +// mpi::Comm comm = arg->GetComm(); +// +// /* All children should have is_compressed properly set. Use a Reduce to AND all is_compressed. */ +// +// +// +// //if ( arg->GetCommRank() == 0 ) +// // { +// // DistSkeletonize( arg ); +// // } +//};