Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to non-blocking MPI recv in multiplication #556

Merged
merged 1 commit into from
Nov 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions src/C-interface/bml_parallel.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,62 @@ bml_mpi_recv(
case csr:
bml_mpi_recv_csr(A, src, comm);
break;
default:
LOG_ERROR("bml_mpi_recv - unknown matrix type\n");
break;
}
}

void
bml_mpi_irecv(
bml_matrix_t * A,
const int src,
MPI_Comm comm)
{
switch (bml_get_type(A))
{
case dense:
bml_mpi_irecv_dense(A, src, comm);
break;
case ellpack:
bml_mpi_irecv_ellpack(A, src, comm);
break;
case ellsort:
bml_mpi_irecv_ellsort(A, src, comm);
break;
case ellblock:
bml_mpi_irecv_ellblock(A, src, comm);
break;
case csr:
bml_mpi_irecv_csr(A, src, comm);
break;
default:
LOG_ERROR("bml_mpi_irecv - unknown matrix type\n");
break;
}
}

void
bml_mpi_irecv_complete(
bml_matrix_t * A)
{
switch (bml_get_type(A))
{
case dense:
bml_mpi_irecv_complete_dense(A);
break;
case ellpack:
bml_mpi_irecv_complete_ellpack(A);
break;
case ellsort:
bml_mpi_irecv_complete_ellsort(A);
break;
case ellblock:
bml_mpi_irecv_complete_ellblock(A);
break;
case csr:
bml_mpi_irecv_complete_csr(A);
break;
default:
LOG_ERROR("unknown matrix type\n");
break;
Expand Down
6 changes: 6 additions & 0 deletions src/C-interface/bml_parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ void bml_mpi_recv(
bml_matrix_t * A,
const int dst,
MPI_Comm comm);
void bml_mpi_irecv(
bml_matrix_t * A,
const int dst,
MPI_Comm comm);
void bml_mpi_irecv_complete(
bml_matrix_t * A);
bml_matrix_t *bml_mpi_recv_matrix(
bml_matrix_type_t matrix_type,
bml_matrix_precision_t matrix_precision,
Expand Down
50 changes: 50 additions & 0 deletions src/C-interface/csr/bml_parallel_csr.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,56 @@ bml_mpi_recv_csr(
}
}

void
bml_mpi_irecv_csr(
bml_matrix_csr_t * A,
const int dst,
MPI_Comm comm)
{
switch (A->matrix_precision)
{
case single_real:
bml_mpi_irecv_csr_single_real(A, dst, comm);
break;
case double_real:
bml_mpi_irecv_csr_double_real(A, dst, comm);
break;
case single_complex:
bml_mpi_irecv_csr_single_complex(A, dst, comm);
break;
case double_complex:
bml_mpi_irecv_csr_double_complex(A, dst, comm);
break;
default:
LOG_ERROR("unknown precision\n");
break;
}
}

void
bml_mpi_irecv_complete_csr(
bml_matrix_csr_t * A)
{
switch (A->matrix_precision)
{
case single_real:
bml_mpi_irecv_complete_csr_single_real(A);
break;
case double_real:
bml_mpi_irecv_complete_csr_double_real(A);
break;
case single_complex:
bml_mpi_irecv_complete_csr_single_complex(A);
break;
case double_complex:
bml_mpi_irecv_complete_csr_double_complex(A);
break;
default:
LOG_ERROR("unknown precision\n");
break;
}
}

bml_matrix_csr_t *
bml_mpi_recv_matrix_csr(
bml_matrix_precision_t matrix_precision,
Expand Down
34 changes: 34 additions & 0 deletions src/C-interface/csr/bml_parallel_csr.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,40 @@ void bml_mpi_recv_csr_double_complex(
const int src,
MPI_Comm comm);

void bml_mpi_irecv_csr(
bml_matrix_csr_t * A,
const int src,
MPI_Comm comm);

void bml_mpi_irecv_csr_single_real(
bml_matrix_csr_t * A,
const int src,
MPI_Comm comm);
void bml_mpi_irecv_csr_double_real(
bml_matrix_csr_t * A,
const int src,
MPI_Comm comm);
void bml_mpi_irecv_csr_single_complex(
bml_matrix_csr_t * A,
const int src,
MPI_Comm comm);
void bml_mpi_irecv_csr_double_complex(
bml_matrix_csr_t * A,
const int src,
MPI_Comm comm);

void bml_mpi_irecv_complete_csr(
bml_matrix_csr_t * A);

void bml_mpi_irecv_complete_csr_single_real(
bml_matrix_csr_t * A);
void bml_mpi_irecv_complete_csr_double_real(
bml_matrix_csr_t * A);
void bml_mpi_irecv_complete_csr_single_complex(
bml_matrix_csr_t * A);
void bml_mpi_irecv_complete_csr_double_complex(
bml_matrix_csr_t * A);

bml_matrix_csr_t *bml_mpi_recv_matrix_csr(
bml_matrix_precision_t matrix_precision,
int N,
Expand Down
52 changes: 52 additions & 0 deletions src/C-interface/csr/bml_parallel_csr_typed.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,58 @@ void TYPED_FUNC(
bml_free_memory(values);
}

void TYPED_FUNC(
bml_mpi_irecv_csr) (
bml_matrix_csr_t * A,
const int src,
MPI_Comm comm)
{
// recv nnz for each row
A->nnz_buffer = bml_allocate_memory(sizeof(int) * A->N_);
int mpiret =
MPI_Irecv(A->nnz_buffer, A->N_, MPI_INT, src, 111, comm, A->req);
if (mpiret != MPI_SUCCESS)
LOG_ERROR("MPI_Irecv failed for nnz");

// estimate total number of non-zero one may receive
int totnnz = A->NZMAX_ * A->N_ * 2;

// receive column indexes
A->cols_buffer = bml_allocate_memory(sizeof(int) * totnnz);
mpiret =
MPI_Irecv(A->cols_buffer, totnnz, MPI_INT, src, 112, comm,
A->req + 1);
if (mpiret != MPI_SUCCESS)
LOG_ERROR("MPI_Irecv failed for cols");

// recv matrix elements
A->buffer = bml_allocate_memory(sizeof(REAL_T) * totnnz);
mpiret = MPI_Irecv(A->buffer, totnnz, MPI_T, src, 113, comm, A->req + 2);
if (mpiret != MPI_SUCCESS)
LOG_ERROR("MPI_Irecv failed for values");
}

void TYPED_FUNC(
bml_mpi_irecv_complete_csr) (
bml_matrix_csr_t * A)
{
MPI_Waitall(3, A->req, MPI_STATUS_IGNORE);

// move data from receive buffer into matrix
REAL_T *pvalues = A->buffer;
int *pcols = A->cols_buffer;
for (int i = 0; i < A->N_; i++)
{
TYPED_FUNC(bml_set_sparse_row_csr) (A, i, A->nnz_buffer[i], pcols,
pvalues, 0.);
pvalues += A->nnz_buffer[i];
pcols += A->nnz_buffer[i];
}
bml_free_memory(A->nnz_buffer);
bml_free_memory(A->buffer);
bml_free_memory(A->cols_buffer);
}

/*
* Return BML matrix from data received from MPI task src
*/
Expand Down
11 changes: 11 additions & 0 deletions src/C-interface/csr/bml_types_csr.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

#include "../bml_types.h"

#ifdef DO_MPI
#include <mpi.h>
#endif

#define INIT_ROW_SPACE 10
#define EXPAND_FACT 1.3
Expand Down Expand Up @@ -80,6 +83,14 @@ struct bml_matrix_csr_t
bml_domain_t *domain;
/** A copy of the domain decomposition. */
bml_domain_t *domain2;
#ifdef DO_MPI
/** Buffer for communications */
void *buffer;
int *nnz_buffer;
int *cols_buffer;
/** request field for MPI communications*/
MPI_Request req[3];
#endif
};
typedef struct bml_matrix_csr_t bml_matrix_csr_t;

Expand Down
50 changes: 50 additions & 0 deletions src/C-interface/dense/bml_parallel_dense.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,56 @@ bml_mpi_recv_dense(
}
}

void
bml_mpi_irecv_dense(
bml_matrix_dense_t * A,
const int dst,
MPI_Comm comm)
{
switch (A->matrix_precision)
{
case single_real:
bml_mpi_irecv_dense_single_real(A, dst, comm);
break;
case double_real:
bml_mpi_irecv_dense_double_real(A, dst, comm);
break;
case single_complex:
bml_mpi_irecv_dense_single_complex(A, dst, comm);
break;
case double_complex:
bml_mpi_irecv_dense_double_complex(A, dst, comm);
break;
default:
LOG_ERROR("unknown precision\n");
break;
}
}

void
bml_mpi_irecv_complete_dense(
bml_matrix_dense_t * A)
{
switch (A->matrix_precision)
{
case single_real:
bml_mpi_irecv_complete_dense_single_real(A);
break;
case double_real:
bml_mpi_irecv_complete_dense_double_real(A);
break;
case single_complex:
bml_mpi_irecv_complete_dense_single_complex(A);
break;
case double_complex:
bml_mpi_irecv_complete_dense_double_complex(A);
break;
default:
LOG_ERROR("unknown precision\n");
break;
}
}

bml_matrix_dense_t *
bml_mpi_recv_matrix_dense(
bml_matrix_precision_t matrix_precision,
Expand Down
34 changes: 34 additions & 0 deletions src/C-interface/dense/bml_parallel_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,40 @@ void bml_mpi_recv_dense_double_complex(
const int src,
MPI_Comm comm);

void bml_mpi_irecv_dense(
bml_matrix_dense_t * A,
const int src,
MPI_Comm comm);

void bml_mpi_irecv_dense_single_real(
bml_matrix_dense_t * A,
const int src,
MPI_Comm comm);
void bml_mpi_irecv_dense_double_real(
bml_matrix_dense_t * A,
const int src,
MPI_Comm comm);
void bml_mpi_irecv_dense_single_complex(
bml_matrix_dense_t * A,
const int src,
MPI_Comm comm);
void bml_mpi_irecv_dense_double_complex(
bml_matrix_dense_t * A,
const int src,
MPI_Comm comm);

void bml_mpi_irecv_complete_dense(
bml_matrix_dense_t * A);

void bml_mpi_irecv_complete_dense_single_real(
bml_matrix_dense_t * A);
void bml_mpi_irecv_complete_dense_double_real(
bml_matrix_dense_t * A);
void bml_mpi_irecv_complete_dense_single_complex(
bml_matrix_dense_t * A);
void bml_mpi_irecv_complete_dense_double_complex(
bml_matrix_dense_t * A);

bml_matrix_dense_t *bml_mpi_recv_matrix_dense(
bml_matrix_precision_t matrix_precision,
int N,
Expand Down
Loading