From c79d5e4f33d95070f20ac950f8131fe930ae802b Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Fri, 18 Jul 2025 08:45:13 -0700 Subject: [PATCH] Define sops->free_inode() to prevent use-after-free during lookup On Linux, when doing path lookup with LOOKUP_RCU, dentry and inode can be dereferenced without refcounts and locks. For this reason, dentry and inode must only be freed after RCU grace period. However, zfs currently frees inode in zfs_inode_destroy synchronously and we can't use GPL-only call_rcu() in zfs directly. Fortunately, on Linux 5.2 and after, if we define sops->free_inode(), the kernel will do call_rcu() for us. This issue may be triggered more easily with init_on_free=1 boot parameter: BUG: kernel NULL pointer dereference, address: 0000000000000020 RIP: 0010:selinux_inode_permission+0x10e/0x1c0 Call Trace: ? show_trace_log_lvl+0x1be/0x2d9 ? show_trace_log_lvl+0x1be/0x2d9 ? show_trace_log_lvl+0x1be/0x2d9 ? security_inode_permission+0x37/0x60 ? __die_body.cold+0x8/0xd ? no_context+0x113/0x220 ? exc_page_fault+0x6d/0x130 ? asm_exc_page_fault+0x1e/0x30 ? selinux_inode_permission+0x10e/0x1c0 security_inode_permission+0x37/0x60 link_path_walk.part.0.constprop.0+0xb5/0x360 ? path_init+0x27d/0x3c0 path_lookupat+0x3e/0x1a0 filename_lookup+0xc0/0x1d0 ? __check_object_size.part.0+0x123/0x150 ? strncpy_from_user+0x4e/0x130 ? getname_flags.part.0+0x4b/0x1c0 vfs_statx+0x72/0x120 ? ioctl_has_perm.constprop.0.isra.0+0xbd/0x120 __do_sys_newlstat+0x39/0x70 ? __x64_sys_ioctl+0x8d/0xd0 do_syscall_64+0x30/0x40 entry_SYSCALL_64_after_hwframe+0x62/0xc7 Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Reviewed-by: Rob Norris Signed-off-by: Chunwei Chen Co-authored-by: Chunwei Chen Closes #17546 --- config/kernel-free-inode.m4 | 24 +++++++++++++++++++++++ config/kernel.m4 | 2 ++ include/os/linux/zfs/sys/zfs_znode_impl.h | 1 + module/os/linux/zfs/zfs_znode_os.c | 17 ++++++++++++++-- module/os/linux/zfs/zpl_super.c | 12 ++++++++++++ 5 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 config/kernel-free-inode.m4 diff --git a/config/kernel-free-inode.m4 b/config/kernel-free-inode.m4 new file mode 100644 index 000000000..baa1c3484 --- /dev/null +++ b/config/kernel-free-inode.m4 @@ -0,0 +1,24 @@ +dnl # +dnl # Linux 5.2 API change +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE], [ + ZFS_LINUX_TEST_SRC([super_operations_free_inode], [ + #include + + static void free_inode(struct inode *) { } + + static struct super_operations sops __attribute__ ((unused)) = { + .free_inode = free_inode, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_SOPS_FREE_INODE], [ + AC_MSG_CHECKING([whether sops->free_inode() exists]) + ZFS_LINUX_TEST_RESULT([super_operations_free_inode], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_SOPS_FREE_INODE, 1, [sops->free_inode() exists]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index c99aed357..c5482da64 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -132,6 +132,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_PIN_USER_PAGES ZFS_AC_KERNEL_SRC_TIMER ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR + ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE @@ -248,6 +249,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_PIN_USER_PAGES ZFS_AC_KERNEL_TIMER ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR + ZFS_AC_KERNEL_SOPS_FREE_INODE case "$host_cpu" in powerpc*) ZFS_AC_KERNEL_CPU_HAS_FEATURE diff --git a/include/os/linux/zfs/sys/zfs_znode_impl.h b/include/os/linux/zfs/sys/zfs_znode_impl.h index b38847b20..6a77e40ab 100644 --- a/include/os/linux/zfs/sys/zfs_znode_impl.h +++ b/include/os/linux/zfs/sys/zfs_znode_impl.h @@ -157,6 +157,7 @@ struct znode; extern int zfs_sync(struct super_block *, int, cred_t *); extern int zfs_inode_alloc(struct super_block *, struct inode **ip); +extern void zfs_inode_free(struct inode *); extern void zfs_inode_destroy(struct inode *); extern void zfs_mark_inode_dirty(struct inode *); extern boolean_t zfs_relatime_need_update(const struct inode *); diff --git a/module/os/linux/zfs/zfs_znode_os.c b/module/os/linux/zfs/zfs_znode_os.c index 607b3995c..7b28f2640 100644 --- a/module/os/linux/zfs/zfs_znode_os.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -371,6 +371,12 @@ zfs_inode_alloc(struct super_block *sb, struct inode **ip) return (0); } +void +zfs_inode_free(struct inode *ip) +{ + kmem_cache_free(znode_cache, ITOZ(ip)); +} + /* * Called in multiple places when an inode should be destroyed. */ @@ -395,8 +401,15 @@ zfs_inode_destroy(struct inode *ip) nvlist_free(zp->z_xattr_cached); zp->z_xattr_cached = NULL; } - - kmem_cache_free(znode_cache, zp); +#ifndef HAVE_SOPS_FREE_INODE + /* + * inode needs to be freed in RCU callback. If we have + * super_operations->free_inode, Linux kernel will do call_rcu + * for us. But if we don't have it, since call_rcu is GPL-only + * symbol, we can only free synchronously and accept the risk. + */ + zfs_inode_free(ip); +#endif } static void diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index a682bfd33..94dcdd0b8 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -45,6 +45,15 @@ zpl_inode_alloc(struct super_block *sb) return (ip); } +#ifdef HAVE_SOPS_FREE_INODE +static void +zpl_inode_free(struct inode *ip) +{ + ASSERT(atomic_read(&ip->i_count) == 0); + zfs_inode_free(ip); +} +#endif + static void zpl_inode_destroy(struct inode *ip) { @@ -455,6 +464,9 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg) const struct super_operations zpl_super_operations = { .alloc_inode = zpl_inode_alloc, +#ifdef HAVE_SOPS_FREE_INODE + .free_inode = zpl_inode_free, +#endif .destroy_inode = zpl_inode_destroy, .dirty_inode = zpl_dirty_inode, .write_inode = NULL,