Skip to content

Commit

Permalink
ompi request handling race condition fix (MT-case)
Browse files Browse the repository at this point in the history
Described in open-mpi#1813
  • Loading branch information
artpol84 committed Jun 25, 2016
1 parent dac9201 commit 65f222b
Showing 1 changed file with 44 additions and 6 deletions.
50 changes: 44 additions & 6 deletions opal/threads/wait_sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -13,6 +14,8 @@
*/
#include "opal/sys/atomic.h"
#include "opal/threads/condition.h"
#include "opal/constants.h"
#include "opal/prefetch.h"
#include <pthread.h>

BEGIN_C_DECLS
Expand All @@ -33,17 +36,29 @@ typedef struct ompi_wait_sync_t {

#define WAIT_SYNC_RELEASE(sync) \
if (opal_using_threads()) { \
pthread_mutex_lock(&(sync)->lock); \
pthread_cond_destroy(&(sync)->condition); \
pthread_mutex_unlock(&(sync)->lock); \
pthread_mutex_destroy(&(sync)->lock); \
}

#define WAIT_SYNC_SIGNAL(sync) \
#define WAIT_SYNC_LOCK(sync) \
if (opal_using_threads()) { \
pthread_mutex_lock(&(sync->lock)); \
pthread_cond_signal(&sync->condition); \
pthread_mutex_unlock(&(sync->lock)); \
pthread_mutex_lock(&((sync)->lock)); \
}

#define WAIT_SYNC_SIGNAL_UNLOCK(sync) \
if (opal_using_threads()) { \
pthread_cond_signal(&((sync)->condition)); \
pthread_mutex_unlock(&((sync)->lock)); \
}

#define WAIT_SYNC_UNLOCK(sync) \
if (opal_using_threads()) { \
pthread_mutex_unlock(&((sync)->lock)); \
}


OPAL_DECLSPEC int sync_wait_mt(ompi_wait_sync_t *sync);
static inline int sync_wait_st (ompi_wait_sync_t *sync)
{
Expand Down Expand Up @@ -75,16 +90,39 @@ static inline int sync_wait_st (ompi_wait_sync_t *sync)
*/
static inline void wait_sync_update(ompi_wait_sync_t *sync, int updates, int status)
{
/* Fast path: if we can decrement the sync->count without
* dropping it to 0 - just return
* Consider that there might be concurrent decrements
*/
if( OPAL_LIKELY(OPAL_SUCCESS == status) ) {
/* we know that our contribution is not yet there
* so we can safely check if the count will still be above 0
* after the change */
while( (sync->count - updates > 0) ){
int tmp = sync->count;
if( OPAL_ATOMIC_CMPSET_32(&sync->count, tmp, tmp - updates) ){
/* fastpath succeeds */
return;
}
}
}

/* Slow path */
WAIT_SYNC_LOCK(sync);

if( OPAL_LIKELY(OPAL_SUCCESS == status) ) {
if( 0 != (OPAL_THREAD_ADD32(&sync->count, -updates)) ) {
return;
goto unlock;
}
} else {
/* this is an error path so just use the atomic */
opal_atomic_swap_32 (&sync->count, 0);
sync->status = OPAL_ERROR;
}
WAIT_SYNC_SIGNAL(sync);
WAIT_SYNC_SIGNAL_UNLOCK(sync);
return;
unlock:
WAIT_SYNC_UNLOCK(sync);
}

END_C_DECLS

0 comments on commit 65f222b

Please sign in to comment.