From 08fbc0201a05807d26d21fc9f0f1bf1eeb9977d5 Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Wed, 6 Jan 2016 19:05:24 -0800 Subject: [PATCH] Don't hold mutex until release cv in cv_wait If a thread is holding mutex when doing cv_destroy, it might end up waiting a thread in cv_wait. The waiter would wake up trying to aquire the same mutex and cause deadlock. We solve this by move the mutex_enter to the bottom of cv_wait, so that the waiter will release the cv first, allowing cv_destroy to succeed and have a chance to free the mutex. This would create race condition on the cv_mutex. We use xchg to set and check it to ensure we won't be harmed by the race. This would result in the cv_mutex debugging becomes best-effort. Signed-off-by: Chunwei Chen --- module/spl/spl-condvar.c | 49 ++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index c3467a56..acc2580f 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -80,6 +80,7 @@ static void cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) { DEFINE_WAIT(wait); + kmutex_t *m; ASSERT(cvp); ASSERT(mp); @@ -87,11 +88,11 @@ cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) ASSERT(mutex_owned(mp)); atomic_inc(&cvp->cv_refs); - if (cvp->cv_mutex == NULL) - cvp->cv_mutex = mp; - + m = ACCESS_ONCE(cvp->cv_mutex); + if (!m) + m = xchg(&cvp->cv_mutex, mp); /* Ensure the same mutex is used by all callers */ - ASSERT(cvp->cv_mutex == mp); + ASSERT(m == NULL || m == mp); prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); atomic_inc(&cvp->cv_waiters); @@ -106,16 +107,23 @@ cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) io_schedule(); else schedule(); - mutex_enter(mp); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { + /* This is set without any lock, so it's racy. But this is + * just for debug anyway, so make it best-effort */ cvp->cv_mutex = NULL; wake_up(&cvp->cv_destroy); } finish_wait(&cvp->cv_event, &wait); atomic_dec(&cvp->cv_refs); + + /* + * Hold mutex after we release the cvp, otherwise we could dead lock + * with a thread holding the mutex and call cv_destroy. + */ + mutex_enter(mp); } void @@ -148,6 +156,7 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, int state) { DEFINE_WAIT(wait); + kmutex_t *m; clock_t time_left; ASSERT(cvp); @@ -156,15 +165,16 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, ASSERT(mutex_owned(mp)); atomic_inc(&cvp->cv_refs); - if (cvp->cv_mutex == NULL) - cvp->cv_mutex = mp; - + m = ACCESS_ONCE(cvp->cv_mutex); + if (!m) + m = xchg(&cvp->cv_mutex, mp); /* Ensure the same mutex is used by all callers */ - ASSERT(cvp->cv_mutex == mp); + ASSERT(m == NULL || m == mp); /* XXX - Does not handle jiffie wrap properly */ time_left = expire_time - jiffies; if (time_left <= 0) { + /* XXX - doesn't reset cv_mutex */ atomic_dec(&cvp->cv_refs); return (-1); } @@ -179,10 +189,11 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, */ mutex_exit(mp); time_left = schedule_timeout(time_left); - mutex_enter(mp); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { + /* This is set without any lock, so it's racy. But this is + * just for debug anyway, so make it best-effort */ cvp->cv_mutex = NULL; wake_up(&cvp->cv_destroy); } @@ -190,6 +201,11 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, finish_wait(&cvp->cv_event, &wait); atomic_dec(&cvp->cv_refs); + /* + * Hold mutex after we release the cvp, otherwise we could dead lock + * with a thread holding the mutex and call cv_destroy. + */ + mutex_enter(mp); return (time_left > 0 ? time_left : -1); } @@ -216,6 +232,7 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, int state) { DEFINE_WAIT(wait); + kmutex_t *m; hrtime_t time_left, now; unsigned long time_left_us; @@ -225,11 +242,11 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, ASSERT(mutex_owned(mp)); atomic_inc(&cvp->cv_refs); - if (cvp->cv_mutex == NULL) - cvp->cv_mutex = mp; - + m = ACCESS_ONCE(cvp->cv_mutex); + if (!m) + m = xchg(&cvp->cv_mutex, mp); /* Ensure the same mutex is used by all callers */ - ASSERT(cvp->cv_mutex == mp); + ASSERT(m == NULL || m == mp); now = gethrtime(); time_left = expire_time - now; @@ -253,10 +270,11 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, * interrupts */ usleep_range(time_left_us, time_left_us + 100); - mutex_enter(mp); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { + /* This is set without any lock, so it's racy. But this is + * just for debug anyway, so make it best-effort */ cvp->cv_mutex = NULL; wake_up(&cvp->cv_destroy); } @@ -264,6 +282,7 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, finish_wait(&cvp->cv_event, &wait); atomic_dec(&cvp->cv_refs); + mutex_enter(mp); time_left = expire_time - gethrtime(); return (time_left > 0 ? time_left : -1); }