Skip to content

Commit

Permalink
Add memory pool for ellblock
Browse files Browse the repository at this point in the history
  • Loading branch information
jeanlucf22 committed Jul 13, 2020
1 parent fb5c949 commit 5b6655f
Show file tree
Hide file tree
Showing 18 changed files with 292 additions and 67 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,11 @@ if(NOT (NOBLAS))
endif()
endif()

if(BML_ELLBLOCK_MEMPOOL)
message(STATUS "Use memory pool for elllock")
add_definitions(-DBML_ELLBLOCK_USE_MEMPOOL)
endif()

add_definitions(-D_POSIX_C_SOURCE=200112L)

check_function_exists(posix_memalign HAVE_POSIX_MEMALIGN)
Expand Down
3 changes: 3 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ EOF
echo "BML_MAGMA Build with MAGMA (default is ${BML_MAGMA})"
echo "BML_CUSOLVER Build with cuSOLVER (default is ${BML_CUSOLVER})"
echo "BML_XSMM Build with XSMM (default is ${BML_XSMM})"
echo "BML_ELLBLOCK_MEMPOOL Use ellblock memory pool (default is ${BML_ELLBLOCK_MEMPOOL}"
echo "CUDA_TOOLKIT_ROOT_DIR Path to CUDA dir (default is ${CUDA_TOOLKIT_ROOT_DIR})"
echo "INTEL_OPT {yes, no} (default is ${INTEL_OPT})"
}
Expand Down Expand Up @@ -104,6 +105,7 @@ set_defaults() {
: ${BML_MAGMA:=no}
: ${BML_CUSOLVER:=no}
: ${BML_XSMM:=no}
: ${BML_ELLBLOCK_MEMPOOL:=yes}
: ${CUDA_TOOLKIT_ROOT_DIR:=}
: ${INTEL_OPT:=no}
}
Expand Down Expand Up @@ -179,6 +181,7 @@ configure() {
-DBML_MAGMA="${BML_MAGMA}" \
-DBML_CUSOLVER="${BML_CUSOLVER}" \
-DBML_XSMM="${BML_XSMM}" \
-DBML_ELLBLOCK_MEMPOOL="${BML_ELLBLOCK_MEMPOOL}" \
-DCUDA_TOOLKIT_ROOT_DIR="${CUDA_TOOLKIT_ROOT_DIR}" \
-DINTEL_OPT="${INTEL_OPT:=no}" \
| tee --append "${LOG_FILE}"
Expand Down
5 changes: 3 additions & 2 deletions src/C-interface/bml_allocate.c
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ bml_block_matrix(
bml_matrix_precision_t matrix_precision,
int NB,
int MB,
int M,
int *bsizes,
bml_distribution_mode_t distrib_mode)
{
Expand All @@ -321,11 +322,11 @@ bml_block_matrix(
{
case ellpack:
return bml_block_matrix_ellblock(matrix_precision,
NB, MB, bsizes, distrib_mode);
NB, MB, M, bsizes, distrib_mode);
break;
case ellblock:
return bml_block_matrix_ellblock(matrix_precision,
NB, MB, bsizes, distrib_mode);
NB, MB, M, bsizes, distrib_mode);
break;
default:
LOG_ERROR("unsupported matrix type (type ID %d)\n", matrix_type);
Expand Down
4 changes: 2 additions & 2 deletions src/C-interface/ellblock/bml_add_ellblock_typed.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ void TYPED_FUNC(
{
A_ptr_value[kb]
=
bml_noinit_allocate_memory(nelements *
sizeof(REAL_T));
TYPED_FUNC(bml_allocate_block_ellblock) (A, ib,
nelements);
}
for (int kk = 0; kk < nelements; kk++)
{
Expand Down
15 changes: 11 additions & 4 deletions src/C-interface/ellblock/bml_allocate_ellblock.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,18 @@ void
bml_deallocate_ellblock(
bml_matrix_ellblock_t * A)
{
#ifdef BML_ELLBLOCK_USE_MEMPOOL
bml_free_memory(A->memory_pool_offsets);
bml_free_memory(A->memory_pool_ptr);
bml_free_memory(A->memory_pool);
#else
for (int ib = 0; ib < A->NB; ib++)
for (int jp = 0; jp < A->nnzb[ib]; jp++)
{
int ind = ROWMAJOR(ib, jp, A->NB, A->MB);
bml_free_memory(A->ptr_value[ind]);
}
#endif
bml_free_memory(A->ptr_value);
bml_free_memory(A->indexb);
bml_free_memory(A->nnzb);
Expand Down Expand Up @@ -246,6 +252,7 @@ bml_block_matrix_ellblock(
bml_matrix_precision_t matrix_precision,
int NB,
int MB,
int M,
int *bsizes,
bml_distribution_mode_t distrib_mode)
{
Expand All @@ -254,19 +261,19 @@ bml_block_matrix_ellblock(
switch (matrix_precision)
{
case single_real:
A = bml_block_matrix_ellblock_single_real(NB, MB, bsizes,
A = bml_block_matrix_ellblock_single_real(NB, MB, M, bsizes,
distrib_mode);
break;
case double_real:
A = bml_block_matrix_ellblock_double_real(NB, MB, bsizes,
A = bml_block_matrix_ellblock_double_real(NB, MB, M, bsizes,
distrib_mode);
break;
case single_complex:
A = bml_block_matrix_ellblock_single_complex(NB, MB, bsizes,
A = bml_block_matrix_ellblock_single_complex(NB, MB, M, bsizes,
distrib_mode);
break;
case double_complex:
A = bml_block_matrix_ellblock_double_complex(NB, MB, bsizes,
A = bml_block_matrix_ellblock_double_complex(NB, MB, M, bsizes,
distrib_mode);
break;
default:
Expand Down
39 changes: 39 additions & 0 deletions src/C-interface/ellblock/bml_allocate_ellblock.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,30 +158,35 @@ bml_matrix_ellblock_t *bml_block_matrix_ellblock(
bml_matrix_precision_t matrix_precision,
int NB,
int MB,
int M,
int *bsizes,
bml_distribution_mode_t distrib_mode);

bml_matrix_ellblock_t *bml_block_matrix_ellblock_single_real(
int NB,
int MB,
int M,
int *bsizes,
bml_distribution_mode_t distrib_mode);

bml_matrix_ellblock_t *bml_block_matrix_ellblock_double_real(
int NB,
int MB,
int M,
int *bsizes,
bml_distribution_mode_t distrib_mode);

bml_matrix_ellblock_t *bml_block_matrix_ellblock_single_complex(
int NB,
int MB,
int M,
int *bsizes,
bml_distribution_mode_t distrib_mode);

bml_matrix_ellblock_t *bml_block_matrix_ellblock_double_complex(
int NB,
int MB,
int M,
int *bsizes,
bml_distribution_mode_t distrib_mode);

Expand All @@ -204,4 +209,38 @@ int bml_get_nb(
);
int count_nelements(
bml_matrix_ellblock_t * A);

void *bml_allocate_block_ellblock_single_real(
bml_matrix_ellblock_t * A,
const int ib,
const int nelements);
void *bml_allocate_block_ellblock_double_real(
bml_matrix_ellblock_t * A,
const int ib,
const int nelements);
void *bml_allocate_block_ellblock_single_complex(
bml_matrix_ellblock_t * A,
const int ib,
const int nelements);
void *bml_allocate_block_ellblock_double_complex(
bml_matrix_ellblock_t * A,
const int ib,
const int nelements);

void bml_free_block_ellblock_single_real(
bml_matrix_ellblock_t * A,
const int ib,
const int jb);
void bml_free_block_ellblock_double_real(
bml_matrix_ellblock_t * A,
const int ib,
const int jb);
void bml_free_block_ellblock_single_complex(
bml_matrix_ellblock_t * A,
const int ib,
const int jb);
void bml_free_block_ellblock_double_complex(
bml_matrix_ellblock_t * A,
const int ib,
const int jb);
#endif
Loading

0 comments on commit 5b6655f

Please sign in to comment.