Skip to content

Commit

Permalink
Improving Skeletonize and id
Browse files Browse the repository at this point in the history
  • Loading branch information
Chenhan Yu committed Jan 21, 2019
1 parent 9bd0edc commit c6c7396
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 87 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
deps/
build/
4 changes: 2 additions & 2 deletions example/run_gofmm_userdefined.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ then mpiexec="mpirun -n 4";
else mpiexec="";
fi
if [ "${HMLP_USE_MPI}" = true ];
then executable="./test_mpigofmm.x"
else executable="./test_gofmm.x";
then executable="./test_mpigofmm"
else executable="./test_gofmm";
fi
echo "@PRIM"
echo 'gofmm'
Expand Down
4 changes: 2 additions & 2 deletions example/sbatch_gofmm_userdefine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ hiddenlayer="512-512-512"
# ======= Do not change anything below this line ========
mpiexec="ibrun tacc_affinity"
#mpiexec="prun"
#executable="./test_mpigofmm.x"
#executable="./test_mpigofmm"
#executable="gdb -ex run --args ./test_mpigofmm.x"
executable="./test_gofmm.x"
executable="./test_gofmm"


echo "@PRIM"
Expand Down
44 changes: 13 additions & 31 deletions frame/primitives/lowrank.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,33 +201,15 @@ void id
jpvt.clear();
jpvt.resize( n, 0 );

/** Traditional pivoting QR (GEQP3) */
//#ifdef HMLP_USE_CUDA
// auto *dev = hmlp_get_device( 0 );
// cublasHandle_t &handle =
// reinterpret_cast<hmlp::gpu::Nvidia*>( dev )->gethandle( 0 );
// hmlp::xgeqp3
// (
// handle,
// m, n,
// A_tmp.data(), m,
// jpvt.data(),
// tau.data(),
// work.data(), lwork
// );
//#else
hmlp::xgeqp4
//hmlp::xgeqp3
(
m, n,
A_tmp.data(), m,
jpvt.data(),
tau.data(),
work.data(), lwork
);
//#endif
//printf( "end xgeqp3\n" );

/** Pivoting QR (GEQP3 or GEQP4). */
if ( true )
{
hmlp::xgeqp4( m, n, A_tmp.data(), m, jpvt.data(), tau.data(), work.data(), lwork );
}
else
{
hmlp::xgeqp3( m, n, A_tmp.data(), m, jpvt.data(), tau.data(), work.data(), lwork );
}
/** Shift jpvt from 1-base to 0-base index. */
for ( int j = 0; j < jpvt.size(); j ++ )
{
Expand Down Expand Up @@ -312,10 +294,10 @@ void id
}
else
{
proj[ j * s + i ] = A_tmp[ j * m + i ];
}
}
}
proj[ j * s + i ] = A_tmp[ j * m + i ];
}
}
}
}
else /** in the old version we use xgels, which is expensive */
{
Expand Down
38 changes: 7 additions & 31 deletions gofmm/gofmm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class CommandLineHelper
/** (Default) user-defined approximation toleratnce and budget. */
double stol = 1E-3;
double budget = 0.0;
bool secure_accuracy = true;
bool secure_accuracy = false;
/** (Default) geometric-oblivious scheme. */
DistanceMetric metric = ANGLE_DISTANCE;

Expand Down Expand Up @@ -1197,48 +1197,28 @@ void Skeletonize( NODE *node )
{
/** Derive type T from NODE. */
using T = typename NODE::T;
/** Early return if we do not need to skeletonize. */
if ( !node->parent ) return;

/** Gather shared data and create reference. */
auto &K = *(node->setup->K);
auto &NN = *(node->setup->NN);
auto maxs = node->setup->MaximumRank();
auto stol = node->setup->Tolerance();
bool secure_accuracy = node->setup->SecureAccuracy();
bool use_adaptive_ranks = node->setup->UseAdaptiveRanks();

/** Gather per node data and create reference. */
auto &data = node->data;
auto &skels = data.skels;
auto &proj = data.proj;
auto &jpvt = data.jpvt;
auto &KIJ = data.KIJ;
auto &candidate_cols = data.candidate_cols;

/** Interpolative decomposition (ID). */
size_t N = K.col();
size_t m = KIJ.row();
size_t n = KIJ.col();
size_t q = node->n;

// /* IMTODO: change this decision to a function call. */
// if ( secure_accuracy )
// {
// if ( !node->isleaf && ( !node->lchild->data.is_compressed || !node->rchild->data.is_compressed ) )
// {
// skels.clear();
// proj.resize( 0, 0 );
// data.is_compressed = false;
// return;
// }
// }

/** Bill's l2 norm scaling factor. */
T scaled_stol = std::sqrt( (T)n / q ) * std::sqrt( (T)m / (N - q) ) * stol;
/** Account for uniform sampling. */
scaled_stol *= std::sqrt( (T)q / N );

/** TODO: check if this is needed? Account for uniform sampling. */
if ( true ) scaled_stol *= std::sqrt( (T)q / N );
/** Call adaptive interpolative decomposition primitive. */
lowrank::id( use_adaptive_ranks, secure_accuracy,
KIJ.row(), KIJ.col(), maxs, scaled_stol, KIJ, skels, proj, jpvt );
Expand All @@ -1249,13 +1229,6 @@ void Skeletonize( NODE *node )
{
skels[ i ] = candidate_cols[ skels[ i ] ];
}

// /** Depending on the flag, decide is_compressed or not. */
// data.is_compressed = ( secure_accuracy ) ? skels.size() : true;
//
// /** Sanity check. */
// if ( data.is_compressed ) assert( skels.size() && proj.size() && jpvt.size() );

}; /* end Skeletonize() */


Expand Down Expand Up @@ -1309,6 +1282,8 @@ class SkeletonizeTask : public Task

void Execute( Worker* user_worker )
{
/** Early return if we do not need to skeletonize. */
if ( !arg->parent ) return;
/* Check if we need to secure the accuracy? */
bool secure_accuracy = arg->setup->SecureAccuracy();
/* Gather per node data and create reference. */
Expand Down Expand Up @@ -3661,7 +3636,8 @@ void SelfTesting( TREE &tree, size_t ntest, size_t nrhs )
printf( "========================================================\n");
for ( size_t i = 0; i < ntest; i ++ )
{
size_t tar = i * n / ntest;
//size_t tar = i * n / ntest;
size_t tar = i * 1000;
Data<T> potentials;
/** ASKIT treecode with NN pruning. */
Evaluate( tree, tar, potentials, EVALUATE_OPTION_NEIGHBOR_PRUNING );
Expand Down
42 changes: 21 additions & 21 deletions gofmm/gofmm_mpi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3335,27 +3335,27 @@ class DistSkeletonKIJTask : public Task



/**
* @brief Skeletonization with interpolative decomposition.
*/
template<typename NODE, typename T>
void DistSkeletonize_v2( NODE *node )
{
/** Early return if we do not need to skeletonize. */
if ( !node->parent ) return;

/* Get the node communicator. */
mpi::Comm comm = arg->GetComm();

/* All children should have is_compressed properly set. Use a Reduce to AND all is_compressed. */



//if ( arg->GetCommRank() == 0 )
// {
// DistSkeletonize<NODE, T>( arg );
// }
};
///**
// * @brief Skeletonization with interpolative decomposition.
// */
//template<typename NODE, typename T>
//void DistSkeletonize_v2( NODE *node )
//{
// /** Early return if we do not need to skeletonize. */
// if ( !node->parent ) return;
//
// /* Get the node communicator. */
// mpi::Comm comm = arg->GetComm();
//
// /* All children should have is_compressed properly set. Use a Reduce to AND all is_compressed. */
//
//
//
// //if ( arg->GetCommRank() == 0 )
// // {
// // DistSkeletonize<NODE, T>( arg );
// // }
//};



Expand Down

0 comments on commit c6c7396

Please sign in to comment.