From c22f5c1c55f8446f6217f6adeebdbe01102db625 Mon Sep 17 00:00:00 2001 From: Pavel Snajdr Date: Tue, 25 Mar 2025 23:20:16 +0100 Subject: [PATCH] Linux: Fix zfs_prune panics v2 (#17121) It turns out that approach taken in the original version of the patch was wrong. So now, we're taking approach in-line with how kernel actually does it - when sb is being torn down, access to it is serialized via sb->s_umount rwsem, only when that lock is taken is it okay to work with s_flags - and the other mistake I was doing was trying to make SB_ACTIVE work, but apparently the kernel checks the negative variant - not SB_DYING and not SB_BORN. Kernels pre-6.6 don't have SB_DYING, but check if sb is hashed instead. Signed-off-by: Pavel Snajdr Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf --- config/kernel-sb-dying.m4 | 19 +++++++++++++++++++ config/kernel.m4 | 2 ++ module/os/linux/zfs/zpl_super.c | 26 +++++++++++++++++--------- 3 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 config/kernel-sb-dying.m4 diff --git a/config/kernel-sb-dying.m4 b/config/kernel-sb-dying.m4 new file mode 100644 index 000000000..882f3e542 --- /dev/null +++ b/config/kernel-sb-dying.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # SB_DYING exists since Linux 6.6 +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SB_DYING], [ + ZFS_LINUX_TEST_SRC([sb_dying], [ + #include + ],[ + (void) SB_DYING; + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SB_DYING], [ + AC_MSG_CHECKING([whether SB_DYING is defined]) + ZFS_LINUX_TEST_RESULT([sb_dying], [ + AC_MSG_RESULT(yes) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index 9928ead1b..1716704de 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -73,6 +73,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FST_MOUNT + ZFS_AC_KERNEL_SRC_SB_DYING ZFS_AC_KERNEL_SRC_SET_NLINK ZFS_AC_KERNEL_SRC_SGET ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO @@ -184,6 +185,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FST_MOUNT + ZFS_AC_KERNEL_SB_DYING ZFS_AC_KERNEL_SET_NLINK ZFS_AC_KERNEL_SGET ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 948fdf842..40c25e464 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -377,17 +377,25 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg) int objects = 0; /* - * deactivate_locked_super calls shrinker_free and only then - * sops->kill_sb cb, resulting in UAF on umount when trying to reach - * for the shrinker functions in zpl_prune_sb of in-umount dataset. - * Increment if s_active is not zero, but don't prune if it is - - * umount could be underway. + * Ensure the superblock is not in the process of being torn down. */ - if (atomic_inc_not_zero(&sb->s_active)) { - (void) -zfs_prune(sb, nr_to_scan, &objects); - atomic_dec(&sb->s_active); +#ifdef HAVE_SB_DYING + if (down_read_trylock(&sb->s_umount)) { + if (!(sb->s_flags & SB_DYING) && sb->s_root && + (sb->s_flags & SB_BORN)) { + (void) zfs_prune(sb, nr_to_scan, &objects); + } + up_read(&sb->s_umount); } - +#else + if (down_read_trylock(&sb->s_umount)) { + if (!hlist_unhashed(&sb->s_instances) && + sb->s_root && (sb->s_flags & SB_BORN)) { + (void) zfs_prune(sb, nr_to_scan, &objects); + } + up_read(&sb->s_umount); + } +#endif } const struct super_operations zpl_super_operations = {