Skip to content

Commit

Permalink
sessions: temporarily disable fboxes
Browse files Browse the repository at this point in the history
the way fboxes works has issues for the sessions implementation,
in particular tthe session finalize approach.

what happens without this temporary fix is that if there is not some fully shcnronizing call
prior to calling session_finalize, there are cases where a process may be probing its fast
mailboxes for processes that are tearing down theses fboxes.  That results in segfauls and
sigbus problems.

The fast box mechanism will need to be supplemented with some kind of shutdown mechanism
that will tell the owner of the fboxes when its okay to actually tear them down.

IN the interest of making progress using the sessions prototype with applications, shut
down the fbox process for the prototype and return to coming up with a real fix at a later
date.

relates to #3

drop use of MPI_Flag

what we're reading at the forum now for Sessions proposal has ditched MPI_Flags.
Now using info object to MPI_Session_init to specify thread support level
desired.

Signed-off-by: Howard Pritchard <hppritcha@gmail.com>
  • Loading branch information
hppritcha committed May 20, 2021
1 parent c226352 commit e8cb216
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 33 deletions.
5 changes: 5 additions & 0 deletions ompi/communicator/comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,11 @@ int ompi_comm_create_from_group (ompi_group_t *group, const char *tag, opal_info
snprintf(newcomp->c_name, MPI_MAX_OBJECT_NAME, "MPI COMM %s FROM GROUP",
ompi_comm_print_cid (newcomp));

newcomp->super.s_info = OBJ_NEW(opal_info_t);
if (NULL == newcomp->super.s_info) {
return OMPI_ERR_OUT_OF_RESOURCE;
}

/* NTH: HACK IN SLEEPY STUFF */
{
opal_info_entry_t *info_entry;
Expand Down
6 changes: 2 additions & 4 deletions ompi/include/mpi.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -485,8 +485,6 @@ typedef int (MPI_Grequest_query_function)(void *, MPI_Status *);
typedef int (MPI_Grequest_free_function)(void *);
typedef int (MPI_Grequest_cancel_function)(void *, int);

typedef unsigned long MPI_Flags;

/*
* Deprecated typedefs. Usage is discouraged, as these may be deleted
* in future versions of the MPI Standard.
Expand Down Expand Up @@ -1854,7 +1852,7 @@ OMPI_DECLSPEC int MPI_Session_get_num_psets (MPI_Session session, int *npset_na
OMPI_DECLSPEC int MPI_Session_get_nth_pset (MPI_Session session, int n, int len, char *pset_name);
OMPI_DECLSPEC int MPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used);
OMPI_DECLSPEC int MPI_Session_get_psetlen (MPI_Session session, int n, int *pset_name_len);
OMPI_DECLSPEC int MPI_Session_init (MPI_Flags *flags, MPI_Info info, MPI_Errhandler errhandler,
OMPI_DECLSPEC int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler,
MPI_Session *session);
OMPI_DECLSPEC MPI_Session MPI_Session_f2c (MPI_Fint session);
OMPI_DECLSPEC int MPI_Session_set_attr (MPI_Session session, int session_keyval, void *attribute_val);
Expand Down Expand Up @@ -2582,7 +2580,7 @@ OMPI_DECLSPEC int PMPI_Session_get_num_psets (MPI_Session session, int *npset_n
OMPI_DECLSPEC int PMPI_Session_get_nth_pset (MPI_Session session, int n, int len, char *pset_name);
OMPI_DECLSPEC int PMPI_Session_get_pset_info (MPI_Session session, const char *pset_name, MPI_Info *info_used);
OMPI_DECLSPEC int PMPI_Session_get_psetlen (MPI_Session session, int n, int *pset_name_len);
OMPI_DECLSPEC int PMPI_Session_init (MPI_Flags *flags, MPI_Info info, MPI_Errhandler errhandler,
OMPI_DECLSPEC int PMPI_Session_init (MPI_Info info, MPI_Errhandler errhandler,
MPI_Session *session);
OMPI_DECLSPEC MPI_Session PMPI_Session_f2c (MPI_Fint session);
OMPI_DECLSPEC int PMPI_Session_set_attr (MPI_Session session, int session_keyval, void *attribute_val);
Expand Down
4 changes: 2 additions & 2 deletions ompi/instance/instance.c
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ static int ompi_mpi_instance_init_common (void)
return OMPI_SUCCESS;
}

int ompi_mpi_instance_init (MPI_Flags *flags, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance)
int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance)
{
ompi_instance_t *new_instance;
int ret;
Expand All @@ -598,7 +598,7 @@ int ompi_mpi_instance_init (MPI_Flags *flags, opal_info_t *info, ompi_errhandler

/* If thread support was enabled, then setup OPAL to allow for them by deault. This must be done
* early to prevent a race condition that can occur with orte_init(). */
if (*flags & MPI_FLAG_THREAD_CONCURRENT) {
if (ts_level == MPI_THREAD_MULTIPLE) {
opal_set_using_threads(true);
}

Expand Down
4 changes: 2 additions & 2 deletions ompi/instance/instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,11 @@ void ompi_mpi_instance_release (void);
/**
* @brief Create a new MPI instance
*
* @param[inout] flags instance flags (see mpi.h)
* @param[in] ts_level thread support level (see mpi.h)
* @param[in] info info object
* @param[in] errhander errhandler to set on the instance
*/
OMPI_DECLSPEC int ompi_mpi_instance_init (MPI_Flags *flags, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance);
OMPI_DECLSPEC int ompi_mpi_instance_init (int ts_level, opal_info_t *info, ompi_errhandler_t *errhandler, ompi_instance_t **instance);

/**
* @brief Destroy an MPI instance and set it to MPI_SESSION_NULL
Expand Down
20 changes: 16 additions & 4 deletions ompi/mpi/c/session_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@
static const char FUNC_NAME[] = "MPI_Session_init";


int MPI_Session_init (MPI_Flags *flags, MPI_Info info, MPI_Errhandler errhandler, MPI_Session *session)
int MPI_Session_init (MPI_Info info, MPI_Errhandler errhandler, MPI_Session *session)
{
int rc;
int rc, flag;
int ts_level = MPI_THREAD_SINGLE; /* for now we default to thread single for OMPI sessions */
char info_value[MPI_MAX_INFO_VAL + 1];
const char ts_level_multi[] = "MPI_THREAD_MULTIPLE";

if ( MPI_PARAM_CHECK ) {
if (NULL == errhandler || NULL == flags || NULL == session) {
if (NULL == errhandler || NULL == session) {
return MPI_ERR_ARG;
}

Expand All @@ -39,7 +42,16 @@ int MPI_Session_init (MPI_Flags *flags, MPI_Info info, MPI_Errhandler errhandler
}
}

rc = ompi_mpi_instance_init (flags, &info->super, errhandler, session);
if (MPI_INFO_NULL != info) {
(void) ompi_info_get (info, "thread_support_level", MPI_MAX_INFO_VAL, info_value, &flag);
if (flag) {
if(strncmp(info_value, ts_level_multi, strlen(ts_level_multi)) == 0) {
ts_level = MPI_THREAD_MULTIPLE;
}
}
}

rc = ompi_mpi_instance_init (ts_level, &info->super, errhandler, session);
/* if an error occured raise it on the null session */
OMPI_ERRHANDLER_RETURN (rc, MPI_SESSION_NULL, rc, FUNC_NAME);
}
21 changes: 1 addition & 20 deletions ompi/runtime/ompi_mpi_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -386,28 +386,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
}
}

/* Figure out the final MPI thread levels. If we were not
compiled for support for MPI threads, then don't allow
MPI_THREAD_MULTIPLE. Set this stuff up here early in the
process so that other components can make decisions based on
this value. */

ompi_mpi_thread_level(requested, provided);

MPI_Flags flags;
switch (*provided) {
case MPI_THREAD_SINGLE:
case MPI_THREAD_FUNNELED:
case MPI_THREAD_SERIALIZED:
flags = MPI_FLAG_THREAD_NONCONCURRENT_SINGLE;
break;
case MPI_THREAD_MULTIPLE:
flags = MPI_FLAG_THREAD_CONCURRENT;
break;
}


ret = ompi_mpi_instance_init (&flags, &ompi_mpi_info_null.info.super, MPI_ERRORS_ARE_FATAL, &ompi_mpi_instance_default);
ret = ompi_mpi_instance_init (*provided, &ompi_mpi_info_null.info.super, MPI_ERRORS_ARE_FATAL, &ompi_mpi_instance_default);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
error = "ompi_mpi_init: ompi_mpi_instance_init failed";
goto error;
Expand Down
4 changes: 3 additions & 1 deletion opal/mca/btl/sm/btl_sm_fifo.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ static inline bool sm_fifo_write_ep(mca_btl_sm_hdr_t *hdr, struct mca_btl_base_e
opal_atomic_wmb();
return mca_btl_sm_fbox_sendi(ep, 0xfe, &rhdr, sizeof(rhdr), NULL, 0);
}
mca_btl_sm_try_fbox_setup(ep, hdr);
#if 0
mca_btl_smr_try_fbox_setup (ep, hdr);
#endif
hdr->next = SM_FIFO_FREE;
sm_fifo_write(ep->fifo, rhdr);

Expand Down

0 comments on commit e8cb216

Please sign in to comment.