mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
Fix snapshot automount deadlock during concurrent zfs recv
zfsctl_snapshot_mount() holds z_teardown_lock(R) across call_usermodehelper(), which spawns a mount process that needs namespace_sem(W) via move_mount. Reading /proc/self/mountinfo holds namespace_sem(R) and needs z_teardown_lock(R) via zpl_show_devname. When zfs_suspend_fs (from zfs recv or zfs rollback) queues z_teardown_lock(W), the rrwlock blocks new readers, completing the deadlock cycle. Fix by releasing z_teardown_lock(R) after gathering the dataset name and mount path, before any blocking operation. Everything after the release operates on local string copies or uses its own synchronization. The parent zfsvfs pointer remains valid because the caller holds a path reference to the automount trigger dentry. Releasing the lock allows zfs_suspend_fs to proceed concurrently with the mount helper, so dmu_objset_hold in zpl_get_tree can transiently fail with ENOENT during the clone swap. The mount helper fails, EISDIR is returned, and the VFS falls back to the ctldir stub (empty directory) until the next access retries. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <alexander.motin@TrueNAS.com> Reviewed-by: Rob Norris <robn@despairlabs.com> Signed-off-by: Ameer Hamza <ahamza@ixsystems.com> Closes #18415
This commit is contained in:
@@ -1201,8 +1201,10 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
|
||||
error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
|
||||
ZFS_MAX_DATASET_NAME_LEN, full_name);
|
||||
if (error)
|
||||
if (error) {
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (is_current_chrooted() == 0) {
|
||||
/*
|
||||
@@ -1220,6 +1222,7 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
error = get_root_path(&mnt_path, m, MAXPATHLEN);
|
||||
if (error != 0) {
|
||||
kmem_free(m, MAXPATHLEN);
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
goto error;
|
||||
}
|
||||
mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
|
||||
@@ -1252,6 +1255,33 @@ zfsctl_snapshot_mount(struct path *path, int flags)
|
||||
snprintf(options, 7, "%s",
|
||||
zfs_snapshot_no_setuid ? "nosuid" : "suid");
|
||||
|
||||
/*
|
||||
* Release z_teardown_lock before potentially blocking operations
|
||||
* (cv_wait for concurrent mounts, call_usermodehelper for the mount
|
||||
* helper). Holding z_teardown_lock(R) across call_usermodehelper
|
||||
* deadlocks with namespace_sem: the mount helper needs
|
||||
* namespace_sem(W) via move_mount, while /proc/self/mountinfo
|
||||
* readers hold namespace_sem(R) and need z_teardown_lock(R) via
|
||||
* zpl_show_devname. A concurrent zfs_suspend_fs queuing
|
||||
* z_teardown_lock(W) blocks new readers, completing the cycle.
|
||||
* See https://github.com/openzfs/zfs/issues/18409
|
||||
*
|
||||
* Releasing the lock allows zfs_suspend_fs to proceed during
|
||||
* the mount, so dmu_objset_hold in zpl_get_tree can transiently
|
||||
* fail with ENOENT during the clone swap. The mount helper
|
||||
* fails, this function returns EISDIR, and the VFS silently
|
||||
* falls back to the ctldir stub (empty directory). The caller
|
||||
* gets the stub inode instead of the real snapshot root until
|
||||
* the next access retries the automount.
|
||||
*
|
||||
* Safe because everything below operates on local string copies
|
||||
* (full_name, full_path) or uses its own synchronization
|
||||
* (zfs_snapshot_lock, se_mtx). The parent zfsvfs pointer
|
||||
* remains valid because we hold a path reference to the
|
||||
* automount trigger dentry.
|
||||
*/
|
||||
zfs_exit(zfsvfs, FTAG);
|
||||
|
||||
/*
|
||||
* Check if snapshot is already being mounted. If found, wait for
|
||||
* pending mount to complete before returning success.
|
||||
|
||||
Reference in New Issue
Block a user