From 1ee159f423b5eb3c4646b0ba2dd0fb359503ba90 Mon Sep 17 00:00:00 2001 From: Boris Protopopov Date: Wed, 23 Sep 2015 12:34:51 -0400 Subject: [PATCH] Fix lock order inversion with zvol_open() zfsonlinux issue #3681 - lock order inversion between zvol_open() and dsl_pool_sync()...zvol_rename_minors() Remove trylock of spa_namespace_lock as it is no longer needed when zvol minor operations are performed in a separate context with no prior locking state; the spa_namespace_lock is no longer held when bdev->bd_mutex or zfs_state_lock might be taken in the code paths originating from the zvol minor operation callbacks. Signed-off-by: Boris Protopopov Signed-off-by: Brian Behlendorf Closes #3681 --- module/zfs/zvol.c | 65 +++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 44 deletions(-) diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index ab4d3ceb7..ba482a474 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -957,56 +957,27 @@ zvol_first_open(zvol_state_t *zv) { objset_t *os; uint64_t volsize; - int locked = 0; int error; uint64_t ro; - /* - * In all other cases the spa_namespace_lock is taken before the - * bdev->bd_mutex lock. But in this case the Linux __blkdev_get() - * function calls fops->open() with the bdev->bd_mutex lock held. - * - * To avoid a potential lock inversion deadlock we preemptively - * try to take the spa_namespace_lock(). Normally it will not - * be contended and this is safe because spa_open_common() handles - * the case where the caller already holds the spa_namespace_lock. - * - * When it is contended we risk a lock inversion if we were to - * block waiting for the lock. Luckily, the __blkdev_get() - * function allows us to return -ERESTARTSYS which will result in - * bdev->bd_mutex being dropped, reacquired, and fops->open() being - * called again. This process can be repeated safely until both - * locks are acquired. - */ - if (!mutex_owned(&spa_namespace_lock)) { - locked = mutex_tryenter(&spa_namespace_lock); - if (!locked) - return (-SET_ERROR(ERESTARTSYS)); - } - - error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL); - if (error) - goto out_mutex; - /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os); if (error) - goto out_mutex; - - error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); - if (error) { - dmu_objset_disown(os, zvol_tag); - zv->zv_objset = NULL; - goto out_mutex; - } + return (SET_ERROR(-error)); zv->zv_objset = os; + + error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL); + if (error) + goto out_owned; + + error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); + if (error) + goto out_owned; + error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); - if (error) { - dmu_objset_disown(os, zvol_tag); - zv->zv_objset = NULL; - goto out_mutex; - } + if (error) + goto out_owned; set_capacity(zv->zv_disk, volsize >> 9); zv->zv_volsize = volsize; @@ -1021,9 +992,11 @@ zvol_first_open(zvol_state_t *zv) zv->zv_flags &= ~ZVOL_RDONLY; } -out_mutex: - if (locked) - mutex_exit(&spa_namespace_lock); +out_owned: + if (error) { + dmu_objset_disown(os, zvol_tag); + zv->zv_objset = NULL; + } return (SET_ERROR(-error)); } @@ -1526,6 +1499,8 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg) { const char *name = (const char *)arg; + ASSERT0(MUTEX_HELD(&spa_namespace_lock)); + /* skip the designated dataset */ if (name && strcmp(dsname, name) == 0) return (0); @@ -1550,6 +1525,8 @@ zvol_create_minors_cb(const char *dsname, void *arg) uint64_t snapdev; int error; + ASSERT0(MUTEX_HELD(&spa_namespace_lock)); + error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL); if (error) return (0);