Skip to content

Commit

Permalink
Merge pull request #258 from electronic-structure/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
electronic-structure authored Sep 12, 2018
2 parents fc1c648 + 1db9cd5 commit 765d1ce
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
8 changes: 4 additions & 4 deletions src/Hamiltonian/set_lapw_h_o.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,10 @@ inline void Hamiltonian::set_fv_h_o<GPU, electronic_structure_method_t::full_pot
acc::copyout(o__.at<CPU>(), o__.ld(), o__.at<GPU>(), o__.ld(), kp__->num_gkvec_row(), kp__->num_gkvec_col());

double tval = t1.stop();
if (kp__->comm().rank() == 0 && ctx_.control().print_performance_) {
DUMP("effective zgemm performance: %12.6f GFlops",
2 * 8e-9 * kp__->num_gkvec() * kp__->num_gkvec() * unit_cell_.mt_aw_basis_size() / tval);
}
//if (kp__->comm().rank() == 0 && ctx_.control().print_performance_) {
// DUMP("effective zgemm performance: %12.6f GFlops",
// 2 * 8e-9 * kp__->num_gkvec() * kp__->num_gkvec() * unit_cell_.mt_aw_basis_size() / tval);
//}

/* add interstitial contributon */
set_fv_h_o_it(kp__, h__, o__);
Expand Down
12 changes: 6 additions & 6 deletions src/Unit_cell/atom.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,15 +262,15 @@ class Atom
t1.stop();

result.allocate(memory_t::device);
utils::timer t2("sirius::Atom::generate_radial_integrals|inner");
//utils::timer t2("sirius::Atom::generate_radial_integrals|inner");
spline_inner_product_gpu_v3(idx_ri.at<GPU>(), (int)idx_ri.size(1), nmtp, rgrid.x().at<GPU>(),
rgrid.dx().at<GPU>(), rf_coef.at<GPU>(), vrf_coef.at<GPU>(), result.at<GPU>());
acc::sync();
if (type().parameters().control().print_performance_) {
double tval = t2.stop();
DUMP("spline GPU integration performance: %12.6f GFlops",
1e-9 * double(idx_ri.size(1)) * nmtp * 85 / tval);
}
//if (type().parameters().control().print_performance_) {
// double tval = t2.stop();
// DUMP("spline GPU integration performance: %12.6f GFlops",
// 1e-9 * double(idx_ri.size(1)) * nmtp * 85 / tval);
//}
result.copy<memory_t::device, memory_t::host>();
result.deallocate(memory_t::device);
#else
Expand Down

0 comments on commit 765d1ce

Please sign in to comment.