Skip to content

Commit

Permalink
Merge pull request #260 from toxa81/develop
Browse files Browse the repository at this point in the history
fixes
  • Loading branch information
toxa81 authored Sep 17, 2018
2 parents ed74c33 + 10122a6 commit 383c8dd
Show file tree
Hide file tree
Showing 10 changed files with 213 additions and 82 deletions.
4 changes: 2 additions & 2 deletions apps/unit_tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ INCLUDE = -I./../../src
.cpp.o:
$(CXX) $(CXX_OPT) $(INCLUDE) $< $(LIB_SIRIUS) $(LIBS) -o $@

all: test_init test_sht test_fft_correctness test_fft_real test_spline test_rot_ylm test_linalg test_wf_ortho test_serialize
all: test_init test_sht test_fft_correctness test_fft_real test_spline test_rot_ylm test_linalg test_wf_ortho test_serialize test_mempool

%: %.cpp $(LIB_SIRIUS)
$(CXX) $(CXX_OPT) $(INCLUDE) $< $(LIB_SIRIUS) $(LIBS) -o $@

clean:
rm -rf *.o test_init test_sht test_fft_correctness test_fft_real test_spline test_rot_ylm test_linalg test_wf_ortho test_serialize *.dSYM
rm -rf *.o test_init test_sht test_fft_correctness test_fft_real test_spline test_rot_ylm test_linalg test_wf_ortho test_serialize test_mempool *.dSYM
91 changes: 91 additions & 0 deletions apps/unit_tests/test_mempool.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#include <sirius.h>

using namespace sirius;

void test1()
{
memory_pool mp;
}

void test2()
{
memory_pool mp;
mp.allocate<double_complex, memory_t::host>(1024);
mp.reset<memory_t::host>();
}

void test3()
{
memory_pool mp;
mp.allocate<double_complex, memory_t::host>(1024);
mp.allocate<double_complex, memory_t::host>(2024);
mp.allocate<double_complex, memory_t::host>(3024);
mp.reset<memory_t::host>();
}

void test4()
{
memory_pool mp;
mp.allocate<double_complex, memory_t::host>(1024);
mp.reset<memory_t::host>();
mp.allocate<double_complex, memory_t::host>(1024);
mp.allocate<double_complex, memory_t::host>(1024);
mp.reset<memory_t::host>();
mp.allocate<double_complex, memory_t::host>(1024);
mp.allocate<double_complex, memory_t::host>(1024);
mp.allocate<double_complex, memory_t::host>(1024);
mp.reset<memory_t::host>();
}

void test5()
{
memory_pool mp;

for (int k = 0; k < 2; k++) {
std::vector<double*> vp;
for (size_t i = 1; i < 20; i++) {
size_t sz = 1 << i;
double* ptr = mp.allocate<double, memory_t::host>(sz);
ptr[0] = 0;
ptr[sz - 1] = 0;
vp.push_back(ptr);
}
for (auto& e: vp) {
mp.free<memory_t::host>(e);
}
}
}

int run_test()
{
test1();
test2();
test3();
test4();
test5();
return 0;
}

int main(int argn, char** argv)
{
cmd_args args;

args.parse_args(argn, argv);
if (args.exist("help")) {
printf("Usage: %s [options]\n", argv[0]);
args.print_help();
return 0;
}

sirius::initialize(1);
printf("%-30s", "testing memory pool: ");
int result = run_test();
if (result) {
printf("\x1b[31m" "Failed" "\x1b[0m" "\n");
} else {
printf("\x1b[32m" "OK" "\x1b[0m" "\n");
}
sirius::finalize();

return 0;
}
4 changes: 2 additions & 2 deletions apps/unit_tests/test_spline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ void test_spline_5()
}
}
double tval = t.stop();
// DUMP("inner product time: %12.6f", tval);
// DUMP("performance: %12.6f GFlops", 1e-9 * n * n * N * 85 / tval);
printf("inner product time: %12.6f", tval);
printf("performance: %12.6f GFlops", 1e-9 * n * n * N * 85 / tval);
}

void test_spline_6()
Expand Down
4 changes: 2 additions & 2 deletions apps/unit_tests/unit_tests.x
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/bin/bash

tests='test_init test_sht test_fft_correctness test_fft_real test_spline test_rot_ylm test_linalg test_wf_ortho test_serialize'
tests='test_init test_sht test_fft_correctness test_fft_real test_spline test_rot_ylm test_linalg test_wf_ortho test_serialize test_mempool'

for test in $tests; do
echo "running '${test}'"
./${test}
err=$?

if [ ${err} == 0 ]; then
echo "OK"
echo "'${test}' passed"
else
echo "'${test}' failed"
exit ${err}
Expand Down
14 changes: 8 additions & 6 deletions src/SDDK/memory_pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,16 @@ class memory_pool
if (mem_type_entry != memory_blocks_.end()) {
/* iterate over memory blocks */
auto it = mem_type_entry->second.begin();
while(it != mem_type_entry->second.end()) {
if (it->used_) {
auto it_prev = it++;
remove_block<mem_type>(it_prev);
} else {
it++;
while (it != mem_type_entry->second.end()) {
auto it1 = it++;
if (it1->used_) {
remove_block<mem_type>(it1);
}
}
it = mem_type_entry->second.begin();
if (mem_type_entry->second.size() != 1 || it->used_ || (it->size_ != it->buf_->size())) {
TERMINATE("error in memory_pool::reset()");
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/SDDK/profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

namespace sddk {

/// Simple profiler and function call tracker.
class profiler
{
private:
Expand Down
32 changes: 12 additions & 20 deletions src/SDDK/wf_inner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

/** \file wf_inner.hpp
*
*
* \brief Contains implementation of inner product for wave-functions.
*/

Expand Down Expand Up @@ -60,20 +60,14 @@ inline void inner(device_t pu__,
PROFILE("sddk::Wave_functions::inner");

static_assert(std::is_same<T, double>::value || std::is_same<T, double_complex>::value, "wrong type");

//assert(&bra__.comm() == &ket__.comm());
//assert(bra__.pw_coeffs().num_rows_loc() == ket__.pw_coeffs().num_rows_loc());
//if (bra__.has_mt()) {
// assert(bra__.mt_coeffs().num_rows_loc() == ket__.mt_coeffs().num_rows_loc());
//}

auto& comm = bra__.comm();
#ifdef __GPU

#ifdef __GPU
if (pu__ == GPU) {
acc::set_device();
}
#endif
#endif

const char* sddk_pp_raw = std::getenv("SDDK_PRINT_PERFORMANCE");
int sddk_pp = (sddk_pp_raw == NULL) ? 0 : std::atoi(sddk_pp_raw);
Expand All @@ -94,8 +88,6 @@ inline void inner(device_t pu__,
}
double time = -omp_get_wtime();

//result__.zero(i0__, j0__, m__, n__);

T alpha = (std::is_same<T, double_complex>::value) ? 1 : 2;
T beta = 0;

Expand Down Expand Up @@ -291,7 +283,7 @@ inline void inner(device_t pu__,
std::array<std::array<int, 4>, 2> dims;

if (pu__ == GPU) {
#ifdef __GPU
#ifdef __GPU
/* state of the buffers:
* state = 0: buffer is free
* state = 1: buffer stores result of local zgemm */
Expand All @@ -312,10 +304,10 @@ inline void inner(device_t pu__,

#pragma omp parallel num_threads(2) shared(buf_state)
{
if (omp_get_thread_num() == 0) {
/* thread 0 spawns as many threads as possible */
omp_set_num_threads(nt - 1);
}
//if (omp_get_thread_num() == 0) {
// /* thread 0 spawns as many threads as possible */
// omp_set_num_threads(nt - 1);
//}

/* this rotates the buffers and the CUDA stream numbers in a round robin way */
int s{0};
Expand Down Expand Up @@ -367,7 +359,7 @@ inline void inner(device_t pu__,
/* store panel: go over the elements of the window and add the elements
* to the resulting array; the .add() method skips the elements that are
* not part of the local result matrix. */
#pragma omp parallel for
#pragma omp parallel for num_threads(nt - 1)
for (int jcol = 0; jcol < ncol; jcol++) {
for (int irow = 0; irow < nrow; irow++) {
/* .add() method takes the global (row, column) indices */
Expand All @@ -385,8 +377,8 @@ inline void inner(device_t pu__,
}
}
omp_set_nested(0);
omp_set_num_threads(nt);
#endif
//omp_set_num_threads(nt);
#endif
}

if (pu__ == CPU) {
Expand Down
50 changes: 43 additions & 7 deletions src/input.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
/** \file input.h
*
* \brief Contains input parameters structures.
*
* \todo Some of the parameters belong to SCF ground state mini-app. Mini-app should parse this values itself.
*/

#ifndef __INPUT_H__
Expand Down Expand Up @@ -353,14 +355,32 @@ struct Control_input

/// Level of internal verification.
int verification_{0};

/// Number of eigen-values that are printed to the standard output.
int num_bands_to_print_{10};

/// If true then performance of some compute-intensive kernels will be printed to the standard output.
bool print_performance_{false};

/// If true then memory usage will be printed to the standard output.
bool print_memory_usage_{false};

/// If true then the checksums of some arrays will be printed (useful during debug).
bool print_checksum_{false};

/// If true then the hashsums of some arrays will be printed.
bool print_hash_{false};

/// If true then the stress tensor components are printed at the end of SCF run.
bool print_stress_{false};

/// If true then the atomic forces are printed at the end of SCF run.
bool print_forces_{false};

/// If true then the timer statistics is printed at the end of SCF run.
bool print_timers_{true};

/// If true then the list of nearest neighbours for each atom is printed to the standard output.
bool print_neighbors_{false};

void read(json const& parser)
Expand Down Expand Up @@ -396,14 +416,21 @@ struct Control_input
}
};

/// Parse parameters input section.
/** Most of this parameters control the behavior of sirius::DFT_ground_state class. */
struct Parameters_input
{
/// Electronic structure method.
std::string electronic_structure_method_{"none"};

/// List of XC functions (typically contains exchange term and correlation term).
std::vector<std::string> xc_functionals_;
std::string core_relativity_{"dirac"};
std::string valence_relativity_{"zora"};

/// Type of core-states relativity in full-potential LAPW case.
std::string core_relativity_{"dirac"};

/// Type of valence states relativity in full-potential LAPW case.
std::string valence_relativity_{"zora"};

/// Number of bands.
/** In spin-collinear case this is the number of bands for each spin channel. */
Expand Down Expand Up @@ -439,11 +466,20 @@ struct Parameters_input
/// Scale muffin-tin radii automatically.
int auto_rmt_{1};

/// Regular k-point grid for the SCF ground state.
std::vector<int> ngridk_{1, 1, 1};

/// Shift in the k-point grid.
std::vector<int> shiftk_{0, 0, 0};
int num_dft_iter_{100};
double energy_tol_{1e-5};
double potential_tol_{1e-5};

/// Number of SCF iterations.
int num_dft_iter_{100};

/// Tolerance in total energy change.
double energy_tol_{1e-5};

/// Tolerance in potential RMS change.
double potential_tol_{1e-5};

/// True if this is a molecule calculation.
bool molecule_{false};
Expand All @@ -460,6 +496,7 @@ struct Parameters_input
/// True if symmetry is used.
bool use_symmetry_{true};

/// Radius on atom nearest-neighbour cluster.
double nn_radius_{-1};

/// Effective screening medium.
Expand All @@ -468,6 +505,7 @@ struct Parameters_input
/// Type of periodic boundary conditions.
std::string esm_bc_{"pbc"};

/// Reduction of the auxiliary magnetic field at each SCF step.
double reduce_aux_bf_{0.0};

void read(json const& parser)
Expand Down Expand Up @@ -590,12 +628,10 @@ struct Hubbard_input
if (!parser.count("hubbard"))
return;

orthogonalize_hubbard_orbitals_ = false;
if (parser["hubbard"].count("orthogonalize_hubbard_wave_functions")) {
orthogonalize_hubbard_orbitals_ = parser["hubbard"].value("orthogonalize_hubbard_wave_functions", orthogonalize_hubbard_orbitals_);
}

normalize_hubbard_orbitals_ = false;
if (parser["hubbard"].count("normalize_hubbard_wave_functions")) {
normalize_hubbard_orbitals_ = parser["hubbard"].value("normalize_hubbard_wave_functions", normalize_hubbard_orbitals_);
}
Expand Down
Loading

0 comments on commit 383c8dd

Please sign in to comment.