mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-09-15 21:56:21 +03:00
Define sops->free_inode() to prevent use-after-free during lookup
On Linux, when doing path lookup with LOOKUP_RCU, dentry and inode can be dereferenced without refcounts and locks. For this reason, dentry and inode must only be freed after RCU grace period. However, zfs currently frees inode in zfs_inode_destroy synchronously and we can't use GPL-only call_rcu() in zfs directly. Fortunately, on Linux 5.2 and after, if we define sops->free_inode(), the kernel will do call_rcu() for us. This issue may be triggered more easily with init_on_free=1 boot parameter: BUG: kernel NULL pointer dereference, address: 0000000000000020 RIP: 0010:selinux_inode_permission+0x10e/0x1c0 Call Trace: ? show_trace_log_lvl+0x1be/0x2d9 ? show_trace_log_lvl+0x1be/0x2d9 ? show_trace_log_lvl+0x1be/0x2d9 ? security_inode_permission+0x37/0x60 ? __die_body.cold+0x8/0xd ? no_context+0x113/0x220 ? exc_page_fault+0x6d/0x130 ? asm_exc_page_fault+0x1e/0x30 ? selinux_inode_permission+0x10e/0x1c0 security_inode_permission+0x37/0x60 link_path_walk.part.0.constprop.0+0xb5/0x360 ? path_init+0x27d/0x3c0 path_lookupat+0x3e/0x1a0 filename_lookup+0xc0/0x1d0 ? __check_object_size.part.0+0x123/0x150 ? strncpy_from_user+0x4e/0x130 ? getname_flags.part.0+0x4b/0x1c0 vfs_statx+0x72/0x120 ? ioctl_has_perm.constprop.0.isra.0+0xbd/0x120 __do_sys_newlstat+0x39/0x70 ? __x64_sys_ioctl+0x8d/0xd0 do_syscall_64+0x30/0x40 entry_SYSCALL_64_after_hwframe+0x62/0xc7 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Rob Norris <rob.norris@klarasystems.com> Signed-off-by: Chunwei Chen <david.chen@nutanix.com> Co-authored-by: Chunwei Chen <david.chen@nutanix.com> Closes #17546
This commit is contained in:
parent
347d68048a
commit
c79d5e4f33
24
config/kernel-free-inode.m4
Normal file
24
config/kernel-free-inode.m4
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
dnl #
|
||||||
|
dnl # Linux 5.2 API change
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE], [
|
||||||
|
ZFS_LINUX_TEST_SRC([super_operations_free_inode], [
|
||||||
|
#include <linux/fs.h>
|
||||||
|
|
||||||
|
static void free_inode(struct inode *) { }
|
||||||
|
|
||||||
|
static struct super_operations sops __attribute__ ((unused)) = {
|
||||||
|
.free_inode = free_inode,
|
||||||
|
};
|
||||||
|
],[])
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SOPS_FREE_INODE], [
|
||||||
|
AC_MSG_CHECKING([whether sops->free_inode() exists])
|
||||||
|
ZFS_LINUX_TEST_RESULT([super_operations_free_inode], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_SOPS_FREE_INODE, 1, [sops->free_inode() exists])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
])
|
@ -132,6 +132,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
|||||||
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
|
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
|
||||||
ZFS_AC_KERNEL_SRC_TIMER
|
ZFS_AC_KERNEL_SRC_TIMER
|
||||||
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR
|
ZFS_AC_KERNEL_SRC_SUPER_BLOCK_S_WB_ERR
|
||||||
|
ZFS_AC_KERNEL_SRC_SOPS_FREE_INODE
|
||||||
case "$host_cpu" in
|
case "$host_cpu" in
|
||||||
powerpc*)
|
powerpc*)
|
||||||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||||
@ -248,6 +249,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
|||||||
ZFS_AC_KERNEL_PIN_USER_PAGES
|
ZFS_AC_KERNEL_PIN_USER_PAGES
|
||||||
ZFS_AC_KERNEL_TIMER
|
ZFS_AC_KERNEL_TIMER
|
||||||
ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR
|
ZFS_AC_KERNEL_SUPER_BLOCK_S_WB_ERR
|
||||||
|
ZFS_AC_KERNEL_SOPS_FREE_INODE
|
||||||
case "$host_cpu" in
|
case "$host_cpu" in
|
||||||
powerpc*)
|
powerpc*)
|
||||||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||||
|
@ -157,6 +157,7 @@ struct znode;
|
|||||||
|
|
||||||
extern int zfs_sync(struct super_block *, int, cred_t *);
|
extern int zfs_sync(struct super_block *, int, cred_t *);
|
||||||
extern int zfs_inode_alloc(struct super_block *, struct inode **ip);
|
extern int zfs_inode_alloc(struct super_block *, struct inode **ip);
|
||||||
|
extern void zfs_inode_free(struct inode *);
|
||||||
extern void zfs_inode_destroy(struct inode *);
|
extern void zfs_inode_destroy(struct inode *);
|
||||||
extern void zfs_mark_inode_dirty(struct inode *);
|
extern void zfs_mark_inode_dirty(struct inode *);
|
||||||
extern boolean_t zfs_relatime_need_update(const struct inode *);
|
extern boolean_t zfs_relatime_need_update(const struct inode *);
|
||||||
|
@ -371,6 +371,12 @@ zfs_inode_alloc(struct super_block *sb, struct inode **ip)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
zfs_inode_free(struct inode *ip)
|
||||||
|
{
|
||||||
|
kmem_cache_free(znode_cache, ITOZ(ip));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called in multiple places when an inode should be destroyed.
|
* Called in multiple places when an inode should be destroyed.
|
||||||
*/
|
*/
|
||||||
@ -395,8 +401,15 @@ zfs_inode_destroy(struct inode *ip)
|
|||||||
nvlist_free(zp->z_xattr_cached);
|
nvlist_free(zp->z_xattr_cached);
|
||||||
zp->z_xattr_cached = NULL;
|
zp->z_xattr_cached = NULL;
|
||||||
}
|
}
|
||||||
|
#ifndef HAVE_SOPS_FREE_INODE
|
||||||
kmem_cache_free(znode_cache, zp);
|
/*
|
||||||
|
* inode needs to be freed in RCU callback. If we have
|
||||||
|
* super_operations->free_inode, Linux kernel will do call_rcu
|
||||||
|
* for us. But if we don't have it, since call_rcu is GPL-only
|
||||||
|
* symbol, we can only free synchronously and accept the risk.
|
||||||
|
*/
|
||||||
|
zfs_inode_free(ip);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -45,6 +45,15 @@ zpl_inode_alloc(struct super_block *sb)
|
|||||||
return (ip);
|
return (ip);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_SOPS_FREE_INODE
|
||||||
|
static void
|
||||||
|
zpl_inode_free(struct inode *ip)
|
||||||
|
{
|
||||||
|
ASSERT(atomic_read(&ip->i_count) == 0);
|
||||||
|
zfs_inode_free(ip);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void
|
static void
|
||||||
zpl_inode_destroy(struct inode *ip)
|
zpl_inode_destroy(struct inode *ip)
|
||||||
{
|
{
|
||||||
@ -455,6 +464,9 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg)
|
|||||||
|
|
||||||
const struct super_operations zpl_super_operations = {
|
const struct super_operations zpl_super_operations = {
|
||||||
.alloc_inode = zpl_inode_alloc,
|
.alloc_inode = zpl_inode_alloc,
|
||||||
|
#ifdef HAVE_SOPS_FREE_INODE
|
||||||
|
.free_inode = zpl_inode_free,
|
||||||
|
#endif
|
||||||
.destroy_inode = zpl_inode_destroy,
|
.destroy_inode = zpl_inode_destroy,
|
||||||
.dirty_inode = zpl_dirty_inode,
|
.dirty_inode = zpl_dirty_inode,
|
||||||
.write_inode = NULL,
|
.write_inode = NULL,
|
||||||
|
Loading…
Reference in New Issue
Block a user