From 07783588bcb513a3a1f4d995b5d4685a9cfc89e5 Mon Sep 17 00:00:00 2001 From: Boris Protopopov Date: Thu, 11 May 2017 16:40:33 -0400 Subject: [PATCH] Revert commit 1ee159f4 Fix lock order inversion with zvol_open() as it did not account for use of zvols as vdevs. The latter use cases resulted in the lock order inversion deadlocks that involved spa_namespace_lock and bdev->bd_mutex. Signed-off-by: Boris Protopopov Signed-off-by: Brian Behlendorf Issue #6065 Issue #6134 --- module/zfs/zvol.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 6cd366602..3bf28e1d4 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1150,12 +1150,36 @@ static int zvol_first_open(zvol_state_t *zv) { objset_t *os; - int error; + int error, locked = 0; + + /* + * In all other cases the spa_namespace_lock is taken before the + * bdev->bd_mutex lock. But in this case the Linux __blkdev_get() + * function calls fops->open() with the bdev->bd_mutex lock held. + * This deadlock can be easily observed with zvols used as vdevs. + * + * To avoid a potential lock inversion deadlock we preemptively + * try to take the spa_namespace_lock(). Normally it will not + * be contended and this is safe because spa_open_common() handles + * the case where the caller already holds the spa_namespace_lock. + * + * When it is contended we risk a lock inversion if we were to + * block waiting for the lock. Luckily, the __blkdev_get() + * function allows us to return -ERESTARTSYS which will result in + * bdev->bd_mutex being dropped, reacquired, and fops->open() being + * called again. This process can be repeated safely until both + * locks are acquired. + */ + if (!mutex_owned(&spa_namespace_lock)) { + locked = mutex_tryenter(&spa_namespace_lock); + if (!locked) + return (-SET_ERROR(ERESTARTSYS)); + } /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zv, &os); if (error) - return (SET_ERROR(-error)); + goto out_mutex; zv->zv_objset = os; @@ -1166,6 +1190,9 @@ zvol_first_open(zvol_state_t *zv) zv->zv_objset = NULL; } +out_mutex: + if (locked) + mutex_exit(&spa_namespace_lock); return (SET_ERROR(-error)); }