From 218b4e0a7608f7ef37ec72042a68c45e539a5d1c Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 18 Jun 2015 09:21:19 -0700 Subject: [PATCH] Add zfs_sb_prune_aliases() function For kernels which do not implement a per-suberblock shrinker, those older than Linux 3.1, the shrink_dcache_parent() function was used to attempt to reclaim dentries. This was found not be entirely reliable and could lead to performance issues on older kernels running meta-data heavy workloads. To address this issue a zfs_sb_prune_aliases() function has been added to implement this functionality. It relies on traversing the list of znodes for a filesystem and adding them to a private list with a reference held. The private list can then be safely walked outside the z_znodes_lock to prune dentires and drop the last reference so the inode can be freed. This provides the same synchronous behavior as the per-filesystem shrinker and has the advantage of depending on only long standing interfaces. Signed-off-by: Brian Behlendorf Signed-off-by: Tim Chase Closes #3501 --- config/kernel-d-prune-aliases.m4 | 19 ++++++++ config/kernel.m4 | 1 + module/zfs/zfs_vfsops.c | 75 +++++++++++++++++++++++++++----- 3 files changed, 84 insertions(+), 11 deletions(-) create mode 100644 config/kernel-d-prune-aliases.m4 diff --git a/config/kernel-d-prune-aliases.m4 b/config/kernel-d-prune-aliases.m4 new file mode 100644 index 000000000..d9c521b1d --- /dev/null +++ b/config/kernel-d-prune-aliases.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # 2.6.12 API change +dnl # d_prune_aliases() helper function available. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_D_PRUNE_ALIASES], + [AC_MSG_CHECKING([whether d_prune_aliases() is available]) + ZFS_LINUX_TRY_COMPILE_SYMBOL([ + #include + ], [ + struct inode *ip = NULL; + d_prune_aliases(ip); + ], [d_prune_aliases], [fs/dcache.c], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_D_PRUNE_ALIASES, 1, + [d_prune_aliases() is available]) + ], [ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index fe42e171e..51f8a2bf7 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -78,6 +78,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_D_MAKE_ROOT ZFS_AC_KERNEL_D_OBTAIN_ALIAS + ZFS_AC_KERNEL_D_PRUNE_ALIASES ZFS_AC_KERNEL_D_SET_D_OP ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 88f655a8c..ae1bc324b 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1072,6 +1072,67 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp) } EXPORT_SYMBOL(zfs_root); +#if !defined(HAVE_SPLIT_SHRINKER_CALLBACK) && !defined(HAVE_SHRINK) && \ + defined(HAVE_D_PRUNE_ALIASES) +/* + * Linux kernels older than 3.1 do not support a per-filesystem shrinker. + * To accommodate this we must improvise and manually walk the list of znodes + * attempting to prune dentries in order to be able to drop the inodes. + * + * To avoid scanning the same znodes multiple times they are always rotated + * to the end of the z_all_znodes list. New znodes are inserted at the + * end of the list so we're always scanning the oldest znodes first. + */ +static int +zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan) +{ + znode_t **zp_array, *zp; + int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *)); + int objects = 0; + int i = 0, j = 0; + + zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP); + + mutex_enter(&zsb->z_znodes_lock); + while ((zp = list_head(&zsb->z_all_znodes)) != NULL) { + + if ((i++ > nr_to_scan) || (j >= max_array)) + break; + + ASSERT(list_link_active(&zp->z_link_node)); + list_remove(&zsb->z_all_znodes, zp); + list_insert_tail(&zsb->z_all_znodes, zp); + + /* Skip active znodes and .zfs entries */ + if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir) + continue; + + if (igrab(ZTOI(zp)) == NULL) + continue; + + zp_array[j] = zp; + j++; + } + mutex_exit(&zsb->z_znodes_lock); + + for (i = 0; i < j; i++) { + zp = zp_array[i]; + + ASSERT3P(zp, !=, NULL); + d_prune_aliases(ZTOI(zp)); + + if (atomic_read(&ZTOI(zp)->i_count) == 1) + objects++; + + iput(ZTOI(zp)); + } + + kmem_free(zp_array, max_array * sizeof (znode_t *)); + + return (objects); +} +#endif /* HAVE_D_PRUNE_ALIASES */ + /* * The ARC has requested that the filesystem drop entries from the dentry * and inode caches. This can occur when the ARC needs to free meta data @@ -1106,18 +1167,10 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) *objects = (*shrinker->scan_objects)(shrinker, &sc); #elif defined(HAVE_SHRINK) *objects = (*shrinker->shrink)(shrinker, &sc); +#elif defined(HAVE_D_PRUNE_ALIASES) + *objects = zfs_sb_prune_aliases(zsb, nr_to_scan); #else - /* - * Linux kernels older than 3.1 do not support a per-filesystem - * shrinker. Therefore, we must fall back to the only available - * interface which is to discard all unused dentries and inodes. - * This behavior clearly isn't ideal but it's required so the ARC - * may free memory. The performance impact is mitigated by the - * fact that the frequently accessed dentry and inode buffers will - * still be in the ARC making them relatively cheap to recreate. - */ - *objects = 0; - shrink_dcache_parent(sb->s_root); +#error "No available dentry and inode cache pruning mechanism." #endif ZFS_EXIT(zsb);