cherry-pick lock-inversion patch for zvol_open
the changes to zvol_open added to 2.1.2 (for coping with kernel changes in 5.13) seem to have introduced a lock order inversion [0]. (noticed while reviewing the 2.0.6->2.0.7 changes (the patch was applied after 2.1.2 was already tagged) [0] https://github.com/openzfs/zfs/pull/12863 Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
This commit is contained in:
parent
64e59c5ce9
commit
531eac3304
212
debian/patches/0012-Fix-zvol_open-lock-inversion.patch
vendored
Normal file
212
debian/patches/0012-Fix-zvol_open-lock-inversion.patch
vendored
Normal file
@ -0,0 +1,212 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Date: Fri, 17 Dec 2021 09:52:13 -0800
|
||||
Subject: [PATCH] Fix zvol_open() lock inversion
|
||||
|
||||
When restructuring the zvol_open() logic for the Linux 5.13 kernel
|
||||
a lock inversion was accidentally introduced. In the updated code
|
||||
the spa_namespace_lock is now taken before the zv_suspend_lock
|
||||
allowing the following scenario to occur:
|
||||
|
||||
down_read <=== waiting for zv_suspend_lock
|
||||
zvol_open <=== holds spa_namespace_lock
|
||||
__blkdev_get
|
||||
blkdev_get_by_dev
|
||||
blkdev_open
|
||||
...
|
||||
|
||||
mutex_lock <== waiting for spa_namespace_lock
|
||||
spa_open_common
|
||||
spa_open
|
||||
dsl_pool_hold
|
||||
dmu_objset_hold_flags
|
||||
dmu_objset_hold
|
||||
dsl_prop_get
|
||||
dsl_prop_get_integer
|
||||
zvol_create_minor
|
||||
dmu_recv_end
|
||||
zfs_ioc_recv_impl <=== holds zv_suspend_lock via zvol_suspend()
|
||||
zfs_ioc_recv
|
||||
...
|
||||
|
||||
This commit resolves the issue by moving the acquisition of the
|
||||
spa_namespace_lock back to after the zv_suspend_lock which restores
|
||||
the original ordering.
|
||||
|
||||
Additionally, as part of this change the error exit paths were
|
||||
simplified where possible.
|
||||
|
||||
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
|
||||
Reviewed-by: Rich Ercolani <rincebrain@gmail.com>
|
||||
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
|
||||
Closes #12863
|
||||
(cherry picked from commit 8a02d01e85556bbe3a1c6947bc11b8ef028d4023)
|
||||
Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
|
||||
---
|
||||
module/os/linux/zfs/zvol_os.c | 121 ++++++++++++++++------------------
|
||||
1 file changed, 58 insertions(+), 63 deletions(-)
|
||||
|
||||
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
|
||||
index 44caadd58..69479b3f7 100644
|
||||
--- a/module/os/linux/zfs/zvol_os.c
|
||||
+++ b/module/os/linux/zfs/zvol_os.c
|
||||
@@ -496,8 +496,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
||||
{
|
||||
zvol_state_t *zv;
|
||||
int error = 0;
|
||||
- boolean_t drop_suspend = B_TRUE;
|
||||
- boolean_t drop_namespace = B_FALSE;
|
||||
+ boolean_t drop_suspend = B_FALSE;
|
||||
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||
hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
|
||||
hrtime_t start = gethrtime();
|
||||
@@ -517,7 +516,36 @@ retry:
|
||||
return (SET_ERROR(-ENXIO));
|
||||
}
|
||||
|
||||
- if (zv->zv_open_count == 0 && !mutex_owned(&spa_namespace_lock)) {
|
||||
+ mutex_enter(&zv->zv_state_lock);
|
||||
+ /*
|
||||
+ * Make sure zvol is not suspended during first open
|
||||
+ * (hold zv_suspend_lock) and respect proper lock acquisition
|
||||
+ * ordering - zv_suspend_lock before zv_state_lock
|
||||
+ */
|
||||
+ if (zv->zv_open_count == 0) {
|
||||
+ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
||||
+ mutex_exit(&zv->zv_state_lock);
|
||||
+ rw_enter(&zv->zv_suspend_lock, RW_READER);
|
||||
+ mutex_enter(&zv->zv_state_lock);
|
||||
+ /* check to see if zv_suspend_lock is needed */
|
||||
+ if (zv->zv_open_count != 0) {
|
||||
+ rw_exit(&zv->zv_suspend_lock);
|
||||
+ } else {
|
||||
+ drop_suspend = B_TRUE;
|
||||
+ }
|
||||
+ } else {
|
||||
+ drop_suspend = B_TRUE;
|
||||
+ }
|
||||
+ }
|
||||
+ rw_exit(&zvol_state_lock);
|
||||
+
|
||||
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
||||
+
|
||||
+ if (zv->zv_open_count == 0) {
|
||||
+ boolean_t drop_namespace = B_FALSE;
|
||||
+
|
||||
+ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
||||
+
|
||||
/*
|
||||
* In all other call paths the spa_namespace_lock is taken
|
||||
* before the bdev->bd_mutex lock. However, on open(2)
|
||||
@@ -542,84 +570,51 @@ retry:
|
||||
* the kernel so the only option is to return the error for
|
||||
* the caller to handle it.
|
||||
*/
|
||||
- if (!mutex_tryenter(&spa_namespace_lock)) {
|
||||
- rw_exit(&zvol_state_lock);
|
||||
+ if (!mutex_owned(&spa_namespace_lock)) {
|
||||
+ if (!mutex_tryenter(&spa_namespace_lock)) {
|
||||
+ mutex_exit(&zv->zv_state_lock);
|
||||
+ rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
#ifdef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||
- schedule();
|
||||
- return (SET_ERROR(-ERESTARTSYS));
|
||||
-#else
|
||||
- if ((gethrtime() - start) > timeout)
|
||||
+ schedule();
|
||||
return (SET_ERROR(-ERESTARTSYS));
|
||||
+#else
|
||||
+ if ((gethrtime() - start) > timeout)
|
||||
+ return (SET_ERROR(-ERESTARTSYS));
|
||||
|
||||
- schedule_timeout(MSEC_TO_TICK(10));
|
||||
- goto retry;
|
||||
+ schedule_timeout(MSEC_TO_TICK(10));
|
||||
+ goto retry;
|
||||
#endif
|
||||
- } else {
|
||||
- drop_namespace = B_TRUE;
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- mutex_enter(&zv->zv_state_lock);
|
||||
- /*
|
||||
- * make sure zvol is not suspended during first open
|
||||
- * (hold zv_suspend_lock) and respect proper lock acquisition
|
||||
- * ordering - zv_suspend_lock before zv_state_lock
|
||||
- */
|
||||
- if (zv->zv_open_count == 0) {
|
||||
- if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
|
||||
- mutex_exit(&zv->zv_state_lock);
|
||||
- rw_enter(&zv->zv_suspend_lock, RW_READER);
|
||||
- mutex_enter(&zv->zv_state_lock);
|
||||
- /* check to see if zv_suspend_lock is needed */
|
||||
- if (zv->zv_open_count != 0) {
|
||||
- rw_exit(&zv->zv_suspend_lock);
|
||||
- drop_suspend = B_FALSE;
|
||||
+ } else {
|
||||
+ drop_namespace = B_TRUE;
|
||||
}
|
||||
}
|
||||
- } else {
|
||||
- drop_suspend = B_FALSE;
|
||||
- }
|
||||
- rw_exit(&zvol_state_lock);
|
||||
-
|
||||
- ASSERT(MUTEX_HELD(&zv->zv_state_lock));
|
||||
|
||||
- if (zv->zv_open_count == 0) {
|
||||
- ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
|
||||
error = -zvol_first_open(zv, !(flag & FMODE_WRITE));
|
||||
- if (error)
|
||||
- goto out_mutex;
|
||||
- }
|
||||
|
||||
- if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
||||
- error = -EROFS;
|
||||
- goto out_open_count;
|
||||
+ if (drop_namespace)
|
||||
+ mutex_exit(&spa_namespace_lock);
|
||||
}
|
||||
|
||||
- zv->zv_open_count++;
|
||||
-
|
||||
- mutex_exit(&zv->zv_state_lock);
|
||||
- if (drop_namespace)
|
||||
- mutex_exit(&spa_namespace_lock);
|
||||
- if (drop_suspend)
|
||||
- rw_exit(&zv->zv_suspend_lock);
|
||||
-
|
||||
- zfs_check_media_change(bdev);
|
||||
-
|
||||
- return (0);
|
||||
+ if (error == 0) {
|
||||
+ if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
|
||||
+ if (zv->zv_open_count == 0)
|
||||
+ zvol_last_close(zv);
|
||||
|
||||
-out_open_count:
|
||||
- if (zv->zv_open_count == 0)
|
||||
- zvol_last_close(zv);
|
||||
+ error = SET_ERROR(-EROFS);
|
||||
+ } else {
|
||||
+ zv->zv_open_count++;
|
||||
+ }
|
||||
+ }
|
||||
|
||||
-out_mutex:
|
||||
mutex_exit(&zv->zv_state_lock);
|
||||
- if (drop_namespace)
|
||||
- mutex_exit(&spa_namespace_lock);
|
||||
if (drop_suspend)
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
- return (SET_ERROR(error));
|
||||
+ if (error == 0)
|
||||
+ zfs_check_media_change(bdev);
|
||||
+
|
||||
+ return (error);
|
||||
}
|
||||
|
||||
static void
|
1
debian/patches/series
vendored
1
debian/patches/series
vendored
@ -9,3 +9,4 @@
|
||||
0009-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
|
||||
0010-arcstat-Fix-integer-division-with-python3.patch
|
||||
0011-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
|
||||
0012-Fix-zvol_open-lock-inversion.patch
|
||||
|
Loading…
Reference in New Issue
Block a user