Skip to content

Commit

Permalink
checkpoint
Browse files Browse the repository at this point in the history
Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
  • Loading branch information
ggouaillardet committed Jul 4, 2019
1 parent 5b22688 commit e4dcf83
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 95 deletions.
104 changes: 42 additions & 62 deletions ompi/mca/coll/base/coll_base_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,6 @@
#include "ompi/mca/pml/pml.h"
#include "coll_base_util.h"

struct retain_op_data {
ompi_request_complete_fn_t req_complete_cb;
void *req_complete_cb_data;
ompi_op_t *op;
ompi_datatype_t *datatype;
};

struct retain_datatypes_data {
ompi_request_complete_fn_t req_complete_cb;
void *req_complete_cb_data;
ompi_datatype_t *stype;
ompi_datatype_t *rtype;
};

struct retain_datatypes_w_data {
ompi_request_complete_fn_t req_complete_cb;
void *req_complete_cb_data;
int count;
ompi_datatype_t *types[];
};

int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount,
ompi_datatype_t* sdatatype,
int dest, int stag,
Expand Down Expand Up @@ -126,111 +105,112 @@ int ompi_rounddown(int num, int factor)
}

static int release_op_callback(struct ompi_request_t *request) {
struct retain_op_data * p = (struct retain_op_data *)request->req_complete_cb_data;
struct ompi_coll_base_nbc_request_t * p = (ompi_coll_base_nbc_request_t *)request;
int rc = OMPI_SUCCESS;
assert (NULL != p);
if (NULL != p->req_complete_cb) {
request->req_complete_cb = p->req_complete_cb;
request->req_complete_cb_data = p->req_complete_cb_data;
rc = request->req_complete_cb(request);
rc = request->req_complete_cb(request->req_complete_cb_data);
}
if (NULL != p->op) {
OBJ_RELEASE(p->op);
}
if (NULL != p->datatype) {
OBJ_RELEASE(p->datatype);
}
free(p);
return rc;
}

int ompi_coll_base_retain_op( ompi_request_t *request, ompi_op_t *op,
int ompi_coll_base_retain_op( ompi_request_t *req, ompi_op_t *op,
ompi_datatype_t *type) {
ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req;
bool retain = !ompi_op_is_intrinsic(op);
retain |= !ompi_datatype_is_predefined(type);
if (OPAL_UNLIKELY(retain)) {
struct retain_op_data *p = (struct retain_op_data *)calloc(1, sizeof(struct retain_op_data));
if (OPAL_UNLIKELY(NULL == p)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (!ompi_op_is_intrinsic(op)) {
OBJ_RETAIN(op);
p->op = op;
request->op = op;
} else {
request->op = NULL;
}
if (!ompi_datatype_is_predefined(type)) {
OBJ_RETAIN(type);
p->datatype = type;
request->datatype = type;
} else {
request->datatype = NULL;
}
p->req_complete_cb = request->req_complete_cb;
p->req_complete_cb_data = request->req_complete_cb_data;
request->req_complete_cb = release_op_callback;
request->req_complete_cb_data = p;
request->req_complete_cb = request->super.req_complete_cb;
request->req_complete_cb_data = request->super.req_complete_cb_data;
req->req_complete_cb = release_op_callback;
req->req_complete_cb_data = request;
}
return OMPI_SUCCESS;
}

static int release_datatypes_callback(struct ompi_request_t *request) {
struct retain_datatypes_data * p = (struct retain_datatypes_data *)request->req_complete_cb_data;
struct ompi_coll_base_nbc_request_t * p = (ompi_coll_base_nbc_request_t *)request;
int rc = OMPI_SUCCESS;
assert (NULL != p);
if (NULL != p->req_complete_cb) {
request->req_complete_cb = p->req_complete_cb;
request->req_complete_cb_data = p->req_complete_cb_data;
rc = request->req_complete_cb(request);
rc = request->req_complete_cb(request->req_complete_cb_data);
}
if (NULL != p->stype) {
OBJ_RELEASE(p->stype);
}
if (NULL != p->rtype) {
OBJ_RELEASE(p->rtype);
}
free(p);
return rc;
}

int ompi_coll_base_retain_datatypes( ompi_request_t *request, ompi_datatype_t *stype,
int ompi_coll_base_retain_datatypes( ompi_request_t *req, ompi_datatype_t *stype,
ompi_datatype_t *rtype) {
ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req;
bool retain = NULL != stype && !ompi_datatype_is_predefined(stype);
retain |= NULL != rtype && !ompi_datatype_is_predefined(rtype);
if (OPAL_UNLIKELY(retain)) {
struct retain_datatypes_data *p = (struct retain_datatypes_data *)calloc(1, sizeof(struct retain_datatypes_data));
if (OPAL_UNLIKELY(NULL == p)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (NULL != stype && !ompi_datatype_is_predefined(stype)) {
OBJ_RETAIN(stype);
p->stype = stype;
request->stype = stype;
} else {
request->stype = NULL;
}
if (NULL != rtype && !ompi_datatype_is_predefined(rtype)) {
OBJ_RETAIN(rtype);
p->rtype = rtype;
request->rtype = rtype;
} else {
request->rtype = NULL;
}
p->req_complete_cb = request->req_complete_cb;
p->req_complete_cb_data = request->req_complete_cb_data;
request->req_complete_cb = release_datatypes_callback;
request->req_complete_cb_data = p;
request->req_complete_cb = req->req_complete_cb;
request->req_complete_cb_data = req->req_complete_cb_data;
req->req_complete_cb = release_datatypes_callback;
req->req_complete_cb_data = request;
}
return OMPI_SUCCESS;
}

static int release_datatypes_w_callback(struct ompi_request_t *request) {
struct retain_datatypes_w_data * p = (struct retain_datatypes_w_data *)request->req_complete_cb_data;
ompi_coll_base_nbc_request_t *p = (ompi_coll_base_nbc_request_t *)request;
int rc = OMPI_SUCCESS;
assert (NULL != p);
if (NULL != p->req_complete_cb) {
if (NULL != request->req_complete_cb) {
request->req_complete_cb = p->req_complete_cb;
request->req_complete_cb_data = p->req_complete_cb_data;
rc = request->req_complete_cb(request);
rc = request->req_complete_cb(request->req_complete_cb_data);
}
for (int i=0; i<p->count; i++) {
OBJ_RELEASE(p->types[i]);
}
free(p);
free(p->types);
return rc;
}

int ompi_coll_base_retain_datatypes_w( ompi_request_t *request, int count,
int ompi_coll_base_retain_datatypes_w( ompi_request_t *req, int count,
ompi_datatype_t *const stypes[], ompi_datatype_t *const rtypes[]) {
ompi_coll_base_nbc_request_t *request = (ompi_coll_base_nbc_request_t *)req;
int datatypes = 0;
for (int i=0; i<count; i++) {
if (NULL != stypes[i] && !ompi_datatype_is_predefined(stypes[i])) {
Expand All @@ -241,25 +221,25 @@ int ompi_coll_base_retain_datatypes_w( ompi_request_t *request, int count,
}
}
if (OPAL_UNLIKELY(0 < datatypes)) {
struct retain_datatypes_w_data *p = (struct retain_datatypes_w_data *)calloc(1, sizeof(struct retain_datatypes_data)+(datatypes-1)*sizeof(ompi_datatype_t *));
if (OPAL_UNLIKELY(NULL == p)) {
request->types = (ompi_datatype_t **)calloc(datatypes, sizeof(ompi_datatype_t *));
if (OPAL_UNLIKELY(NULL == request->types)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
datatypes = 0;
for (int i=0; i<count; i++) {
if (NULL != stypes[i] && !ompi_datatype_is_predefined(stypes[i])) {
p->types[datatypes++] = stypes[i];
request->types[datatypes++] = stypes[i];
OBJ_RETAIN(stypes[i]);
}
if (NULL != rtypes[i] && !ompi_datatype_is_predefined(rtypes[i])) {
p->types[datatypes++] = rtypes[i];
request->types[datatypes++] = rtypes[i];
OBJ_RETAIN(rtypes[i]);
}
}
p->req_complete_cb = request->req_complete_cb;
p->req_complete_cb_data = request->req_complete_cb_data;
request->req_complete_cb = release_datatypes_w_callback;
request->req_complete_cb_data = p;
request->req_complete_cb = req->req_complete_cb;
request->req_complete_cb_data = req->req_complete_cb_data;
req->req_complete_cb = release_datatypes_w_callback;
req->req_complete_cb_data = request;
}
return OMPI_SUCCESS;
}
16 changes: 15 additions & 1 deletion ompi/mca/coll/base/coll_base_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,20 @@

BEGIN_C_DECLS

struct ompi_coll_base_nbc_request_t {
ompi_request_t super;
ompi_request_complete_fn_t req_complete_cb;
void *req_complete_cb_data;
ompi_op_t *op;
ompi_datatype_t *datatype;
ompi_datatype_t *stype;
ompi_datatype_t *rtype;
ompi_datatype_t **types;
int count;
};

typedef struct ompi_coll_base_nbc_request_t ompi_coll_base_nbc_request_t;

/**
* A MPI_like function doing a send and a receive simultaneously.
* If one of the communications results in a zero-byte message the
Expand Down Expand Up @@ -86,7 +100,7 @@ unsigned int ompi_mirror_perm(unsigned int x, int nbits);
int ompi_rounddown(int num, int factor);

int ompi_coll_base_retain_op( ompi_request_t *request, ompi_op_t *op,
ompi_datatype_t *type);
ompi_datatype_t *type);

int ompi_coll_base_retain_datatypes( ompi_request_t *request, ompi_datatype_t *stype,
ompi_datatype_t *rtype);
Expand Down
14 changes: 7 additions & 7 deletions ompi/mca/coll/libnbc/coll_libnbc.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
* $COPYRIGHT$
Expand All @@ -28,7 +28,7 @@
#define MCA_COLL_LIBNBC_EXPORT_H

#include "ompi/mca/coll/coll.h"
#include "ompi/request/request.h"
#include "ompi/mca/coll/base/coll_base_util.h"
#include "opal/sys/atomic.h"

BEGIN_C_DECLS
Expand Down Expand Up @@ -121,7 +121,7 @@ typedef struct NBC_Schedule NBC_Schedule;
OBJ_CLASS_DECLARATION(NBC_Schedule);

struct ompi_coll_libnbc_request_t {
ompi_request_t super;
ompi_coll_base_nbc_request_t super;
MPI_Comm comm;
long row_offset;
bool nbc_complete; /* status in libnbc level */
Expand All @@ -145,13 +145,13 @@ typedef ompi_coll_libnbc_request_t NBC_Handle;
opal_free_list_item_t *item; \
item = opal_free_list_wait (&mca_coll_libnbc_component.requests); \
req = (ompi_coll_libnbc_request_t*) item; \
OMPI_REQUEST_INIT(&req->super, persistent); \
req->super.req_mpi_object.comm = comm; \
OMPI_REQUEST_INIT(&req->super.super, persistent); \
req->super.super.req_mpi_object.comm = comm; \
} while (0)

#define OMPI_COLL_LIBNBC_REQUEST_RETURN(req) \
do { \
OMPI_REQUEST_FINI(&(req)->super); \
OMPI_REQUEST_FINI(&(req)->super.super); \
opal_free_list_return (&mca_coll_libnbc_component.requests, \
(opal_free_list_item_t*) (req)); \
} while (0)
Expand Down
30 changes: 15 additions & 15 deletions ompi/mca/coll/libnbc/coll_libnbc_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
* Copyright (c) 2017 Ian Bradley Morgan and Anthony Skjellum. All
* rights reserved.
Expand Down Expand Up @@ -448,21 +448,21 @@ ompi_coll_libnbc_progress(void)
/* done, remove and complete */
OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock);
opal_list_remove_item(&mca_coll_libnbc_component.active_requests,
&request->super.super.super);
&request->super.super.super.super);
OPAL_THREAD_UNLOCK(&mca_coll_libnbc_component.lock);

if( OMPI_SUCCESS == res || NBC_OK == res || NBC_SUCCESS == res ) {
request->super.req_status.MPI_ERROR = OMPI_SUCCESS;
request->super.super.req_status.MPI_ERROR = OMPI_SUCCESS;
}
else {
request->super.req_status.MPI_ERROR = res;
request->super.super.req_status.MPI_ERROR = res;
}
if(request->super.req_persistent) {
if(request->super.super.req_persistent) {
/* reset for the next communication */
request->row_offset = 0;
}
if(!request->super.req_persistent || !REQUEST_COMPLETE(&request->super)) {
ompi_request_complete(&request->super, true);
if(!request->super.super.req_persistent || !REQUEST_COMPLETE(&request->super.super)) {
ompi_request_complete(&request->super.super, true);
}
}
OPAL_THREAD_LOCK(&mca_coll_libnbc_component.lock);
Expand Down Expand Up @@ -527,7 +527,7 @@ request_start(size_t count, ompi_request_t ** requests)
NBC_DEBUG(5, "tmpbuf address=%p size=%u\n", handle->tmpbuf, sizeof(handle->tmpbuf));
NBC_DEBUG(5, "--------------------------------\n");

handle->super.req_complete = REQUEST_PENDING;
handle->super.super.req_complete = REQUEST_PENDING;
handle->nbc_complete = false;

res = NBC_Start(handle);
Expand Down Expand Up @@ -557,7 +557,7 @@ request_free(struct ompi_request_t **ompi_req)
ompi_coll_libnbc_request_t *request =
(ompi_coll_libnbc_request_t*) *ompi_req;

if( !REQUEST_COMPLETE(&request->super) ) {
if( !REQUEST_COMPLETE(&request->super.super) ) {
return MPI_ERR_REQUEST;
}

Expand All @@ -571,11 +571,11 @@ request_free(struct ompi_request_t **ompi_req)
static void
request_construct(ompi_coll_libnbc_request_t *request)
{
request->super.req_type = OMPI_REQUEST_COLL;
request->super.req_status._cancelled = 0;
request->super.req_start = request_start;
request->super.req_free = request_free;
request->super.req_cancel = request_cancel;
request->super.super.req_type = OMPI_REQUEST_COLL;
request->super.super.req_status._cancelled = 0;
request->super.super.req_start = request_start;
request->super.super.req_free = request_free;
request->super.super.req_cancel = request_cancel;
}


Expand Down
Loading

0 comments on commit e4dcf83

Please sign in to comment.