From a3c1eb77721a0d511b4fe7111bb2314686570c4b Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Fri, 19 Dec 2014 11:31:59 +0800 Subject: [PATCH] mutex: force serialization on mutex_exit() to fix races It is known that mutexes in Linux are not safe when using them to synchronize the freeing of object in which the mutex is embedded: http://lwn.net/Articles/575477/ The known places in ZFS which are suspected to suffer from the race condition are zio->io_lock and dbuf->db_mtx. * zio uses zio->io_lock and zio->io_cv to synchronize freeing between zio_wait() and zio_done(). * dbuf uses dbuf->db_mtx to protect reference counting. This patch fixes this kind of race by forcing serialization on mutex_exit() with a spin lock, making the mutex safe by sacrificing a bit of performance and memory overhead. This issue most commonly manifests itself as a deadlock in the zio pipeline caused by a process spinning on the damaged mutex. Similar deadlocks have been reported for the dbuf->db_mtx mutex. And it can also cause a NULL dereference or bad paging request under the right circumstances. This issue any many like it are linked off the zfsonlinux/zfs#2523 issue. Specifically this fix resolves at least the following outstanding issues: zfsonlinux/zfs#401 zfsonlinux/zfs#2523 zfsonlinux/zfs#2679 zfsonlinux/zfs#2684 zfsonlinux/zfs#2704 zfsonlinux/zfs#2708 zfsonlinux/zfs#2517 zfsonlinux/zfs#2827 zfsonlinux/zfs#2850 zfsonlinux/zfs#2891 zfsonlinux/zfs#2897 zfsonlinux/zfs#2247 zfsonlinux/zfs#2939 Signed-off-by: Chunwei Chen Signed-off-by: Brian Behlendorf Signed-off-by: Richard Yao Closes #421 --- include/sys/mutex.h | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/include/sys/mutex.h b/include/sys/mutex.h index d946ff366..31497f6a1 100644 --- a/include/sys/mutex.h +++ b/include/sys/mutex.h @@ -44,6 +44,7 @@ typedef enum { */ typedef struct { struct mutex m; + spinlock_t m_lock; /* used for serializing mutex_exit */ } kmutex_t; static inline kthread_t * @@ -70,6 +71,7 @@ mutex_owner(kmutex_t *mp) ASSERT(type == MUTEX_DEFAULT); \ \ __mutex_init(&(mp)->m, #mp, &__key); \ + spin_lock_init(&(mp)->m_lock); \ }) #undef mutex_destroy @@ -84,12 +86,37 @@ mutex_owner(kmutex_t *mp) ASSERT3P(mutex_owner(mp), !=, current); \ mutex_lock(&(mp)->m); \ }) -#define mutex_exit(mp) mutex_unlock(&(mp)->m) +/* + * The reason for the spinlock: + * + * The Linux mutex is designed with a fast-path/slow-path design such that it + * does not guarantee serialization upon itself, allowing a race where latter + * acquirers finish mutex_unlock before former ones. + * + * The race renders it unsafe to be used for serializing the freeing of an + * object in which the mutex is embedded, where the latter acquirer could go + * on to free the object while the former one is still doing mutex_unlock and + * causing memory corruption. + * + * However, there are many places in ZFS where the mutex is used for + * serializing object freeing, and the code is shared among other OSes without + * this issue. Thus, we need the spinlock to force the serialization on + * mutex_exit(). + * + * See http://lwn.net/Articles/575477/ for the information about the race. + */ +#define mutex_exit(mp) \ +({ \ + spin_lock(&(mp)->m_lock); \ + mutex_unlock(&(mp)->m); \ + spin_unlock(&(mp)->m_lock); \ +}) #else /* HAVE_MUTEX_OWNER */ typedef struct { struct mutex m_mutex; + spinlock_t m_lock; kthread_t *m_owner; } kmutex_t; @@ -125,6 +152,7 @@ spl_mutex_clear_owner(kmutex_t *mp) ASSERT(type == MUTEX_DEFAULT); \ \ __mutex_init(MUTEX(mp), #mp, &__key); \ + spin_lock_init(&(mp)->m_lock); \ spl_mutex_clear_owner(mp); \ }) @@ -153,8 +181,10 @@ spl_mutex_clear_owner(kmutex_t *mp) #define mutex_exit(mp) \ ({ \ + spin_lock(&(mp)->m_lock); \ spl_mutex_clear_owner(mp); \ mutex_unlock(MUTEX(mp)); \ + spin_unlock(&(mp)->m_lock); \ }) #endif /* HAVE_MUTEX_OWNER */