diff --git a/include/sys/zfs_vfsops.h b/include/sys/zfs_vfsops.h index eeeffbe4c..4b88260de 100644 --- a/include/sys/zfs_vfsops.h +++ b/include/sys/zfs_vfsops.h @@ -73,6 +73,7 @@ typedef struct zfs_sb { uint64_t z_nr_znodes; /* number of znodes in the fs */ unsigned long z_rollback_time; /* last online rollback time */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */ + arc_prune_t *z_arc_prune; /* called by ARC to prune caches */ struct inode *z_ctldir; /* .zfs directory inode */ avl_tree_t z_ctldir_snaps; /* .zfs/snapshot entries */ kmutex_t z_ctldir_lock; /* .zfs ctldir lock */ diff --git a/include/sys/zpl.h b/include/sys/zpl.h index 3fc5d979f..c7701aae5 100644 --- a/include/sys/zpl.h +++ b/include/sys/zpl.h @@ -63,7 +63,7 @@ extern const struct file_operations zpl_file_operations; extern const struct file_operations zpl_dir_file_operations; /* zpl_super.c */ -extern void zpl_prune_sbs(int64_t bytes_to_scan, void *private); +extern void zpl_prune_sb(int64_t nr_to_scan, void *arg); typedef struct zpl_mount_data { const char *z_osname; /* Dataset name */ diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index 321b6285c..4b3dc3666 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -386,7 +386,11 @@ Use \fB1\fR for yes (default) and \fB0\fR to disable. \fBzfs_arc_meta_limit\fR (ulong) .ad .RS 12n -Meta limit for arc size +The maximum allowed size in bytes that meta data buffers are allowed to +consume in the ARC. When this limit is reached meta data buffers will +be reclaimed even if the overall arc_c_max has not been reached. This +value defaults to 0 which indicates that 3/4 of the ARC may be used +for meta data. .sp Default value: \fB0\fR. .RE @@ -397,9 +401,14 @@ Default value: \fB0\fR. \fBzfs_arc_meta_prune\fR (int) .ad .RS 12n -Bytes of meta data to prune +The number of dentries and inodes to be scanned looking for entries +which can be dropped. This may be required when the ARC reaches the +\fBzfs_arc_meta_limit\fR because dentries and inodes can pin buffers +in the ARC. Increasing this value will cause to dentry and inode caches +to be pruned more aggressively. Setting this value to 0 will disable +pruning the inode and dentry caches. .sp -Default value: \fB1,048,576\fR. +Default value: \fB10,000\fR. .RE .sp diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 188086767..f9f0008c0 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -158,8 +158,8 @@ static kmutex_t arc_reclaim_thr_lock; static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */ static uint8_t arc_thread_exit; -/* number of bytes to prune from caches when at arc_meta_limit is reached */ -int zfs_arc_meta_prune = 1048576; +/* number of objects to prune from caches when arc_meta_limit is reached */ +int zfs_arc_meta_prune = 10000; typedef enum arc_reclaim_strategy { ARC_RECLAIM_AGGR, /* Aggressive reclaim strategy */ @@ -5607,7 +5607,7 @@ module_param(zfs_arc_meta_limit, ulong, 0644); MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size"); module_param(zfs_arc_meta_prune, int, 0644); -MODULE_PARM_DESC(zfs_arc_meta_prune, "Bytes of meta data to prune"); +MODULE_PARM_DESC(zfs_arc_meta_prune, "Meta objects to scan for prune"); module_param(zfs_arc_grow_retry, int, 0644); MODULE_PARM_DESC(zfs_arc_grow_retry, "Seconds before growing arc size"); diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index 4df324a68..e98f4bf6a 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -1068,29 +1068,52 @@ zfs_root(zfs_sb_t *zsb, struct inode **ipp) } EXPORT_SYMBOL(zfs_root); -#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) +/* + * The ARC has requested that the filesystem drop entries from the dentry + * and inode caches. This can occur when the ARC needs to free meta data + * blocks but can't because they are all pinned by entries in these caches. + */ int zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) { zfs_sb_t *zsb = sb->s_fs_info; + int error = 0; +#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) struct shrinker *shrinker = &sb->s_shrink; struct shrink_control sc = { .nr_to_scan = nr_to_scan, .gfp_mask = GFP_KERNEL, }; +#endif ZFS_ENTER(zsb); -#ifdef HAVE_SPLIT_SHRINKER_CALLBACK + +#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) *objects = (*shrinker->scan_objects)(shrinker, &sc); -#else +#elif defined(HAVE_SHRINK) *objects = (*shrinker->shrink)(shrinker, &sc); +#else + /* + * Linux kernels older than 3.1 do not support a per-filesystem + * shrinker. Therefore, we must fall back to the only available + * interface which is to discard all unused dentries and inodes. + * This behavior clearly isn't ideal but it's required so the ARC + * may free memory. The performance impact is mitigated by the + * fact that the frequently accessed dentry and inode buffers will + * still be in the ARC making them relatively cheap to recreate. + */ + *objects = 0; + shrink_dcache_parent(sb->s_root); #endif ZFS_EXIT(zsb); - return (0); + dprintf_ds(zsb->z_os->os_dsl_dataset, + "pruning, nr_to_scan=%lu objects=%d error=%d\n", + nr_to_scan, *objects, error); + + return (error); } EXPORT_SYMBOL(zfs_sb_prune); -#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */ /* * Teardown the zfs_sb_t. @@ -1286,6 +1309,8 @@ zfs_domount(struct super_block *sb, void *data, int silent) if (!zsb->z_issnap) zfsctl_create(zsb); + + zsb->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb); out: if (error) { dmu_objset_disown(zsb->z_os, zsb); @@ -1324,6 +1349,7 @@ zfs_umount(struct super_block *sb) zfs_sb_t *zsb = sb->s_fs_info; objset_t *os; + arc_remove_prune_callback(zsb->z_arc_prune); VERIFY(zfs_sb_teardown(zsb, B_TRUE) == 0); os = zsb->z_os; bdi_destroy(sb->s_bdi); @@ -1682,7 +1708,6 @@ zfs_init(void) zfs_znode_init(); dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); register_filesystem(&zpl_fs_type); - (void) arc_add_prune_callback(zpl_prune_sbs, NULL); } void diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c index 47cc2fcf4..ef0f9d311 100644 --- a/module/zfs/zpl_super.c +++ b/module/zfs/zpl_super.c @@ -109,6 +109,12 @@ zpl_evict_inode(struct inode *ip) #else +static void +zpl_drop_inode(struct inode *ip) +{ + generic_delete_inode(ip); +} + static void zpl_clear_inode(struct inode *ip) { @@ -125,7 +131,6 @@ zpl_inode_delete(struct inode *ip) truncate_setsize(ip, 0); clear_inode(ip); } - #endif /* HAVE_EVICT_INODE */ static void @@ -276,37 +281,13 @@ zpl_kill_sb(struct super_block *sb) #endif /* HAVE_S_INSTANCES_LIST_HEAD */ } -#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) -/* - * Linux 3.1 - 3.x API - * - * The Linux 3.1 API introduced per-sb cache shrinkers to replace the - * global ones. This allows us a mechanism to cleanly target a specific - * zfs file system when the dnode and inode caches grow too large. - * - * In addition, the 3.0 kernel added the iterate_supers_type() helper - * function which is used to safely walk all of the zfs file systems. - */ -static void -zpl_prune_sb(struct super_block *sb, void *arg) -{ - int objects = 0; - int error; - - error = -zfs_sb_prune(sb, *(unsigned long *)arg, &objects); - ASSERT3S(error, <=, 0); -} -#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */ - void -zpl_prune_sbs(int64_t bytes_to_scan, void *private) +zpl_prune_sb(int64_t nr_to_scan, void *arg) { -#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) - unsigned long nr_to_scan = (bytes_to_scan / sizeof (znode_t)); + struct super_block *sb = (struct super_block *)arg; + int objects = 0; - iterate_supers_type(&zpl_fs_type, zpl_prune_sb, &nr_to_scan); - kmem_reap(); -#endif /* defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK) */ + (void) -zfs_sb_prune(sb, nr_to_scan, &objects); } #ifdef HAVE_NR_CACHED_OBJECTS @@ -343,10 +324,10 @@ const struct super_operations zpl_super_operations = { .destroy_inode = zpl_inode_destroy, .dirty_inode = zpl_dirty_inode, .write_inode = NULL, - .drop_inode = NULL, #ifdef HAVE_EVICT_INODE .evict_inode = zpl_evict_inode, #else + .drop_inode = zpl_drop_inode, .clear_inode = zpl_clear_inode, .delete_inode = zpl_inode_delete, #endif /* HAVE_EVICT_INODE */