-
Notifications
You must be signed in to change notification settings - Fork 885
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix error handling for generalized requests #12392
base: main
Are you sure you want to change the base?
Changes from all commits
59c2af8
16c6381
adb640d
40a500d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,33 +24,55 @@ | |
#include "ompi/request/grequest.h" | ||
#include "ompi/mpi/fortran/base/fint_2_int.h" | ||
|
||
/** | ||
/* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please revert this change (since it's not relevant) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you mean? I'm fixing a inconsistency in the comment style respect to the other block comments in the same source file. Am I supposed to submit a separate PR for such trivial fixes? Or should these inconsistencies left alone to live forever? |
||
* Internal function to specialize the call to the user provided free_fn | ||
* for generalized requests. | ||
* @return The return value of the user specified callback or MPI_SUCCESS. | ||
*/ | ||
static inline int ompi_grequest_internal_free(ompi_grequest_t* greq) | ||
static inline int ompi_grequest_invoke_free(ompi_grequest_t* greq) | ||
{ | ||
int rc = MPI_SUCCESS; | ||
int rc = OMPI_SUCCESS; | ||
MPI_Fint ierr; | ||
|
||
if (NULL != greq->greq_free.c_free) { | ||
if (greq->greq_funcs_are_c) { | ||
rc = greq->greq_free.c_free(greq->greq_state); | ||
} else { | ||
greq->greq_free.f_free((MPI_Aint*)greq->greq_state, &ierr); | ||
rc = OMPI_FINT_2_INT(ierr); | ||
} | ||
/* We were already putting query_fn()'s return value into | ||
* status.MPI_ERROR but for MPI_{Wait,Test}*. If there's a | ||
* free callback to invoke, the standard says to use the | ||
* return value from free_fn() callback, too. | ||
*/ | ||
if (greq->greq_funcs_are_c) { | ||
greq->greq_base.req_status.MPI_ERROR = | ||
greq->greq_free.c_free(greq->greq_state); | ||
} else { | ||
MPI_Fint ierr; | ||
greq->greq_free.f_free((MPI_Aint*)greq->greq_state, &ierr); | ||
greq->greq_base.req_status.MPI_ERROR = OMPI_FINT_2_INT(ierr); | ||
if (OMPI_SUCCESS != rc) { | ||
greq->greq_base.req_status.MPI_ERROR = rc; | ||
} | ||
rc = greq->greq_base.req_status.MPI_ERROR; | ||
} | ||
return rc; | ||
} | ||
|
||
|
||
/* | ||
* Internal function to dispatch the call to the user provided free_fn | ||
* for generalized requests. The freeing code executes as soon as both | ||
* wait/free and complete have occured. | ||
* @return The return value of the user specified callback or MPI_SUCCESS. | ||
*/ | ||
static inline int ompi_grequest_internal_free(ompi_grequest_t* greq) | ||
{ | ||
int rc = OMPI_SUCCESS; | ||
|
||
if (REQUEST_COMPLETE(&greq->greq_base) && greq->greq_user_freed) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we only invoke free if the user called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @devreal Please read the large comment below this function, startin with MPI has some weird semantics with respect to generalized requests .... And no, completion should not trigger free by itself, completion is sort of flagging the thing as done, such that the progress engine running in another thread knows that the thing is completed. What makes things convoluted is that a user could |
||
rc = ompi_grequest_invoke_free(greq); | ||
/* The free_fn() callback should be invoked only once. */ | ||
if (NULL != greq->greq_free.c_free) | ||
greq->greq_free.c_free = NULL; | ||
} | ||
return rc; | ||
} | ||
|
||
/* | ||
* See the comment in the grequest destructor for the weird semantics | ||
* here. If the request has been marked complete via a call to | ||
|
@@ -66,14 +88,10 @@ static int ompi_grequest_free(ompi_request_t** req) | |
ompi_grequest_t* greq = (ompi_grequest_t*)*req; | ||
int rc = OMPI_SUCCESS; | ||
|
||
if( greq->greq_user_freed ) { | ||
return OMPI_ERR_OUT_OF_RESOURCE; | ||
} | ||
greq->greq_user_freed = true; | ||
if( REQUEST_COMPLETE(*req) ) { | ||
rc = ompi_grequest_internal_free(greq); | ||
} | ||
if (OMPI_SUCCESS == rc ) { | ||
rc = ompi_grequest_internal_free(greq); | ||
|
||
if (OMPI_SUCCESS == rc) { | ||
OBJ_RELEASE(*req); | ||
*req = MPI_REQUEST_NULL; | ||
} | ||
|
@@ -180,7 +198,7 @@ int ompi_grequest_start( | |
ompi_request_t** request) | ||
{ | ||
ompi_grequest_t *greq = OBJ_NEW(ompi_grequest_t); | ||
if(greq == NULL) { | ||
if (greq == NULL) { | ||
return OMPI_ERR_OUT_OF_RESOURCE; | ||
} | ||
/* We call RETAIN here specifically to increase the refcount to 2. | ||
|
@@ -211,13 +229,14 @@ int ompi_grequest_start( | |
int ompi_grequest_complete(ompi_request_t *req) | ||
{ | ||
ompi_grequest_t* greq = (ompi_grequest_t*)req; | ||
bool greq_release = !REQUEST_COMPLETE(req); | ||
int rc; | ||
|
||
rc = ompi_request_complete(req, true); | ||
if( greq->greq_user_freed ) { | ||
if (OMPI_SUCCESS == rc && greq_release) { | ||
rc = ompi_grequest_internal_free(greq); | ||
OBJ_RELEASE(req); | ||
} | ||
OBJ_RELEASE(req); | ||
return rc; | ||
} | ||
|
||
|
@@ -237,6 +256,11 @@ int ompi_grequest_invoke_query(ompi_request_t *request, | |
int rc = OMPI_SUCCESS; | ||
ompi_grequest_t *g = (ompi_grequest_t*) request; | ||
|
||
/* MPI mandates that query_fn must be called after the request is | ||
* completed. Make sure the caller does not break the contract. | ||
*/ | ||
assert( REQUEST_COMPLETE(request) ); | ||
|
||
/* MPI-3 mandates that the return value from the query function | ||
* (i.e., the int return value from the C function or the ierr | ||
* argument from the Fortran function) must be returned to the | ||
|
@@ -268,9 +292,8 @@ int ompi_grequest_invoke_query(ompi_request_t *request, | |
rc = OMPI_FINT_2_INT(ierr); | ||
} | ||
} | ||
if( MPI_SUCCESS != rc ) { | ||
if (OMPI_SUCCESS != rc) { | ||
status->MPI_ERROR = rc; | ||
} | ||
return rc; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -533,7 +533,7 @@ static inline int ompi_request_complete(ompi_request_t* request, bool with_signa | |
|
||
ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete, | ||
REQUEST_COMPLETED); | ||
if( REQUEST_PENDING != tmp_sync ) { | ||
if (REQUEST_PENDING != tmp_sync && REQUEST_COMPLETED != tmp_sync) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that a core premise of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
IMHO, it is probably easier to just allow multiple |
||
wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR); | ||
} | ||
} else { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did
ompi_request_free(request)
already set this?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the generic user-level
MPI_Request_free()
for all kinds of requests, not only generalized requests. I'm not so expert with the Open MPI implementation to 100% sureompi_request_free()
will set the status error field on errors. Without that guarantee, my use ofompi_errhandler_request_invoke()
may not work as I intended. Note thatompi_errhandler_request_invoke()
has the side-effect of cleaning up the request and deallocating the object. Sorry, it's a bit complicated, but I had to work within the constraints of what I was given.For example, look at
ompi_comm_request_free()
... I don't see that routine setting the status error field. Therefore, I think being overzealous here is justified. IMHO, this line is a good one and should stay. Otherwise, all internal request types used in Open MPI (and future ones that may be implemented) should be audited and eventually fixed to make sure a failing free sets the error field.