From e843553d0341c4edd880f5ad40da8211669348bd Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Wed, 6 Jan 2016 19:05:24 -0800 Subject: [PATCH] Don't hold mutex until release cv in cv_wait If a thread is holding mutex when doing cv_destroy, it might end up waiting a thread in cv_wait. The waiter would wake up trying to aquire the same mutex and cause deadlock. We solve this by move the mutex_enter to the bottom of cv_wait, so that the waiter will release the cv first, allowing cv_destroy to succeed and have a chance to free the mutex. This would create race condition on the cv_mutex. We use xchg to set and check it to ensure we won't be harmed by the race. This would result in the cv_mutex debugging becomes best-effort. Also, the change reveals a race, which was unlikely before, where we call mutex_destroy while test threads are still holding the mutex. We use kthread_stop to make sure the threads are exit before mutex_destroy. Signed-off-by: Chunwei Chen Signed-off-by: Brian Behlendorf Signed-off-by: Tim Chase Issue zfsonlinux/zfs#4166 Issue zfsonlinux/zfs#4106 --- module/spl/spl-condvar.c | 55 ++++++++++++++++++++++++++---------- module/splat/splat-condvar.c | 30 ++++++++++++++++++++ 2 files changed, 70 insertions(+), 15 deletions(-) diff --git a/module/spl/spl-condvar.c b/module/spl/spl-condvar.c index c3467a56e..c420d18ca 100644 --- a/module/spl/spl-condvar.c +++ b/module/spl/spl-condvar.c @@ -80,6 +80,7 @@ static void cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) { DEFINE_WAIT(wait); + kmutex_t *m; ASSERT(cvp); ASSERT(mp); @@ -87,11 +88,11 @@ cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) ASSERT(mutex_owned(mp)); atomic_inc(&cvp->cv_refs); - if (cvp->cv_mutex == NULL) - cvp->cv_mutex = mp; - + m = ACCESS_ONCE(cvp->cv_mutex); + if (!m) + m = xchg(&cvp->cv_mutex, mp); /* Ensure the same mutex is used by all callers */ - ASSERT(cvp->cv_mutex == mp); + ASSERT(m == NULL || m == mp); prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); atomic_inc(&cvp->cv_waiters); @@ -106,16 +107,25 @@ cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) io_schedule(); else schedule(); - mutex_enter(mp); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { + /* + * This is set without any lock, so it's racy. But this is + * just for debug anyway, so make it best-effort + */ cvp->cv_mutex = NULL; wake_up(&cvp->cv_destroy); } finish_wait(&cvp->cv_event, &wait); atomic_dec(&cvp->cv_refs); + + /* + * Hold mutex after we release the cvp, otherwise we could dead lock + * with a thread holding the mutex and call cv_destroy. + */ + mutex_enter(mp); } void @@ -148,6 +158,7 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, int state) { DEFINE_WAIT(wait); + kmutex_t *m; clock_t time_left; ASSERT(cvp); @@ -156,15 +167,16 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, ASSERT(mutex_owned(mp)); atomic_inc(&cvp->cv_refs); - if (cvp->cv_mutex == NULL) - cvp->cv_mutex = mp; - + m = ACCESS_ONCE(cvp->cv_mutex); + if (!m) + m = xchg(&cvp->cv_mutex, mp); /* Ensure the same mutex is used by all callers */ - ASSERT(cvp->cv_mutex == mp); + ASSERT(m == NULL || m == mp); /* XXX - Does not handle jiffie wrap properly */ time_left = expire_time - jiffies; if (time_left <= 0) { + /* XXX - doesn't reset cv_mutex */ atomic_dec(&cvp->cv_refs); return (-1); } @@ -179,10 +191,13 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, */ mutex_exit(mp); time_left = schedule_timeout(time_left); - mutex_enter(mp); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { + /* + * This is set without any lock, so it's racy. But this is + * just for debug anyway, so make it best-effort + */ cvp->cv_mutex = NULL; wake_up(&cvp->cv_destroy); } @@ -190,6 +205,11 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, finish_wait(&cvp->cv_event, &wait); atomic_dec(&cvp->cv_refs); + /* + * Hold mutex after we release the cvp, otherwise we could dead lock + * with a thread holding the mutex and call cv_destroy. + */ + mutex_enter(mp); return (time_left > 0 ? time_left : -1); } @@ -216,6 +236,7 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, int state) { DEFINE_WAIT(wait); + kmutex_t *m; hrtime_t time_left, now; unsigned long time_left_us; @@ -225,11 +246,11 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, ASSERT(mutex_owned(mp)); atomic_inc(&cvp->cv_refs); - if (cvp->cv_mutex == NULL) - cvp->cv_mutex = mp; - + m = ACCESS_ONCE(cvp->cv_mutex); + if (!m) + m = xchg(&cvp->cv_mutex, mp); /* Ensure the same mutex is used by all callers */ - ASSERT(cvp->cv_mutex == mp); + ASSERT(m == NULL || m == mp); now = gethrtime(); time_left = expire_time - now; @@ -253,10 +274,13 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, * interrupts */ usleep_range(time_left_us, time_left_us + 100); - mutex_enter(mp); /* No more waiters a different mutex could be used */ if (atomic_dec_and_test(&cvp->cv_waiters)) { + /* + * This is set without any lock, so it's racy. But this is + * just for debug anyway, so make it best-effort + */ cvp->cv_mutex = NULL; wake_up(&cvp->cv_destroy); } @@ -264,6 +288,7 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, finish_wait(&cvp->cv_event, &wait); atomic_dec(&cvp->cv_refs); + mutex_enter(mp); time_left = expire_time - gethrtime(); return (time_left > 0 ? time_left : -1); } diff --git a/module/splat/splat-condvar.c b/module/splat/splat-condvar.c index af40a7258..bdbaf79c8 100644 --- a/module/splat/splat-condvar.c +++ b/module/splat/splat-condvar.c @@ -88,6 +88,9 @@ splat_condvar_test12_thread(void *arg) ct->ct_thread->comm, atomic_read(&cv->cv_condvar.cv_waiters)); mutex_exit(&cv->cv_mtx); + /* wait for main thread reap us */ + while (!kthread_should_stop()) + schedule(); return 0; } @@ -151,6 +154,12 @@ splat_condvar_test1(struct file *file, void *arg) /* Wake everything for the failure case */ cv_broadcast(&cv.cv_condvar); cv_destroy(&cv.cv_condvar); + + /* wait for threads to exit */ + for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { + if (!IS_ERR(ct[i].ct_thread)) + kthread_stop(ct[i].ct_thread); + } mutex_destroy(&cv.cv_mtx); return rc; @@ -199,6 +208,12 @@ splat_condvar_test2(struct file *file, void *arg) /* Wake everything for the failure case */ cv_destroy(&cv.cv_condvar); + + /* wait for threads to exit */ + for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { + if (!IS_ERR(ct[i].ct_thread)) + kthread_stop(ct[i].ct_thread); + } mutex_destroy(&cv.cv_mtx); return rc; @@ -234,6 +249,9 @@ splat_condvar_test34_thread(void *arg) mutex_exit(&cv->cv_mtx); + /* wait for main thread reap us */ + while (!kthread_should_stop()) + schedule(); return 0; } @@ -302,6 +320,12 @@ splat_condvar_test3(struct file *file, void *arg) /* Wake everything for the failure case */ cv_broadcast(&cv.cv_condvar); cv_destroy(&cv.cv_condvar); + + /* wait for threads to exit */ + for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { + if (!IS_ERR(ct[i].ct_thread)) + kthread_stop(ct[i].ct_thread); + } mutex_destroy(&cv.cv_mtx); return rc; @@ -372,6 +396,12 @@ splat_condvar_test4(struct file *file, void *arg) /* Wake everything for the failure case */ cv_broadcast(&cv.cv_condvar); cv_destroy(&cv.cv_condvar); + + /* wait for threads to exit */ + for (i = 0; i < SPLAT_CONDVAR_TEST_COUNT; i++) { + if (!IS_ERR(ct[i].ct_thread)) + kthread_stop(ct[i].ct_thread); + } mutex_destroy(&cv.cv_mtx); return rc;