From 799e09f75a31e80a1702a850838c79879af8b917 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 30 Oct 2023 19:56:04 -0400 Subject: [PATCH] Unify arc_prune_async() code There is no sense to have separate implementations for FreeBSD and Linux. Make Linux code shared as more functional and just register FreeBSD-specific prune callback with arc_add_prune_callback() API. Aside of code cleanup this should fix excessive pruning on FreeBSD: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=274698 Reviewed-by: Brian Behlendorf Reviewed-by: Mark Johnston Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15456 --- include/os/linux/zfs/sys/zpl.h | 2 +- include/sys/arc.h | 2 +- include/sys/arc_impl.h | 1 - module/os/freebsd/zfs/arc_os.c | 62 ------------------------------ module/os/freebsd/zfs/zfs_vfsops.c | 32 +++++++++++++++ module/os/linux/zfs/arc_os.c | 51 ------------------------ module/os/linux/zfs/zpl_super.c | 2 +- module/zfs/arc.c | 52 +++++++++++++++++++++++++ 8 files changed, 87 insertions(+), 117 deletions(-) diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h index f4f1dcf95..9b729be6d 100644 --- a/include/os/linux/zfs/sys/zpl.h +++ b/include/os/linux/zfs/sys/zpl.h @@ -60,7 +60,7 @@ extern const struct file_operations zpl_file_operations; extern const struct file_operations zpl_dir_file_operations; /* zpl_super.c */ -extern void zpl_prune_sb(int64_t nr_to_scan, void *arg); +extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg); extern const struct super_operations zpl_super_operations; extern const struct export_operations zpl_export_operations; diff --git a/include/sys/arc.h b/include/sys/arc.h index 9d67dab06..05307aab9 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -81,7 +81,7 @@ typedef struct arc_prune arc_prune_t; typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp, arc_buf_t *buf, void *priv); typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv); -typedef void arc_prune_func_t(int64_t bytes, void *priv); +typedef void arc_prune_func_t(uint64_t bytes, void *priv); /* Shared module parameters */ extern uint_t zfs_arc_average_blocksize; diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index adff42c55..defebe3b2 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -1065,7 +1065,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t); extern void arc_lowmem_init(void); extern void arc_lowmem_fini(void); -extern void arc_prune_async(uint64_t); extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg); extern uint64_t arc_free_memory(void); extern int64_t arc_available_memory(void); diff --git a/module/os/freebsd/zfs/arc_os.c b/module/os/freebsd/zfs/arc_os.c index 12f16edb1..92696c0bf 100644 --- a/module/os/freebsd/zfs/arc_os.c +++ b/module/os/freebsd/zfs/arc_os.c @@ -52,11 +52,6 @@ #include #include -#if __FreeBSD_version >= 1300139 -static struct sx arc_vnlru_lock; -static struct vnode *arc_vnlru_marker; -#endif - extern struct vfsops zfs_vfsops; uint_t zfs_arc_free_target = 0; @@ -131,53 +126,6 @@ arc_default_max(uint64_t min, uint64_t allmem) return (MAX(allmem * 5 / 8, size)); } -/* - * Helper function for arc_prune_async() it is responsible for safely - * handling the execution of a registered arc_prune_func_t. - */ -static void -arc_prune_task(void *arg) -{ - uint64_t nr_scan = (uintptr_t)arg; - -#ifndef __ILP32__ - if (nr_scan > INT_MAX) - nr_scan = INT_MAX; -#endif - -#if __FreeBSD_version >= 1300139 - sx_xlock(&arc_vnlru_lock); - vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker); - sx_xunlock(&arc_vnlru_lock); -#else - vnlru_free(nr_scan, &zfs_vfsops); -#endif -} - -/* - * Notify registered consumers they must drop holds on a portion of the ARC - * buffered they reference. This provides a mechanism to ensure the ARC can - * honor the metadata limit and reclaim otherwise pinned ARC buffers. This - * is analogous to dnlc_reduce_cache() but more generic. - * - * This operation is performed asynchronously so it may be safely called - * in the context of the arc_reclaim_thread(). A reference is taken here - * for each registered arc_prune_t and the arc_prune_task() is responsible - * for releasing it once the registered arc_prune_func_t has completed. - */ -void -arc_prune_async(uint64_t adjust) -{ - -#ifndef __LP64__ - if (adjust > UINTPTR_MAX) - adjust = UINTPTR_MAX; -#endif - taskq_dispatch(arc_prune_taskq, arc_prune_task, - (void *)(intptr_t)adjust, TQ_SLEEP); - ARCSTAT_BUMP(arcstat_prune); -} - uint64_t arc_all_memory(void) { @@ -228,10 +176,6 @@ arc_lowmem_init(void) { arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL, EVENTHANDLER_PRI_FIRST); -#if __FreeBSD_version >= 1300139 - arc_vnlru_marker = vnlru_alloc_marker(); - sx_init(&arc_vnlru_lock, "arc vnlru lock"); -#endif } void @@ -239,12 +183,6 @@ arc_lowmem_fini(void) { if (arc_event_lowmem != NULL) EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem); -#if __FreeBSD_version >= 1300139 - if (arc_vnlru_marker != NULL) { - vnlru_free_marker(arc_vnlru_marker); - sx_destroy(&arc_vnlru_lock); - } -#endif } void diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c index e8b9ada13..a972c720d 100644 --- a/module/os/freebsd/zfs/zfs_vfsops.c +++ b/module/os/freebsd/zfs/zfs_vfsops.c @@ -2070,6 +2070,26 @@ zfs_vnodes_adjust_back(void) #endif } +#if __FreeBSD_version >= 1300139 +static struct sx zfs_vnlru_lock; +static struct vnode *zfs_vnlru_marker; +#endif +static arc_prune_t *zfs_prune; + +static void +zfs_prune_task(uint64_t nr_to_scan, void *arg __unused) +{ + if (nr_to_scan > INT_MAX) + nr_to_scan = INT_MAX; +#if __FreeBSD_version >= 1300139 + sx_xlock(&zfs_vnlru_lock); + vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker); + sx_xunlock(&zfs_vnlru_lock); +#else + vnlru_free(nr_to_scan, &zfs_vfsops); +#endif +} + void zfs_init(void) { @@ -2096,11 +2116,23 @@ zfs_init(void) dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); + +#if __FreeBSD_version >= 1300139 + zfs_vnlru_marker = vnlru_alloc_marker(); + sx_init(&zfs_vnlru_lock, "zfs vnlru lock"); +#endif + zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL); } void zfs_fini(void) { + arc_remove_prune_callback(zfs_prune); +#if __FreeBSD_version >= 1300139 + vnlru_free_marker(zfs_vnlru_marker); + sx_destroy(&zfs_vnlru_lock); +#endif + taskq_destroy(zfsvfs_taskq); zfsctl_fini(); zfs_znode_fini(); diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index 381563476..55cdbba5b 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -495,56 +495,5 @@ arc_unregister_hotplug(void) } #endif /* _KERNEL */ -/* - * Helper function for arc_prune_async() it is responsible for safely - * handling the execution of a registered arc_prune_func_t. - */ -static void -arc_prune_task(void *ptr) -{ - arc_prune_t *ap = (arc_prune_t *)ptr; - arc_prune_func_t *func = ap->p_pfunc; - - if (func != NULL) - func(ap->p_adjust, ap->p_private); - - zfs_refcount_remove(&ap->p_refcnt, func); -} - -/* - * Notify registered consumers they must drop holds on a portion of the ARC - * buffered they reference. This provides a mechanism to ensure the ARC can - * honor the metadata limit and reclaim otherwise pinned ARC buffers. This - * is analogous to dnlc_reduce_cache() but more generic. - * - * This operation is performed asynchronously so it may be safely called - * in the context of the arc_reclaim_thread(). A reference is taken here - * for each registered arc_prune_t and the arc_prune_task() is responsible - * for releasing it once the registered arc_prune_func_t has completed. - */ -void -arc_prune_async(uint64_t adjust) -{ - arc_prune_t *ap; - - mutex_enter(&arc_prune_mtx); - for (ap = list_head(&arc_prune_list); ap != NULL; - ap = list_next(&arc_prune_list, ap)) { - - if (zfs_refcount_count(&ap->p_refcnt) >= 2) - continue; - - zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc); - ap->p_adjust = adjust; - if (taskq_dispatch(arc_prune_taskq, arc_prune_task, - ap, TQ_SLEEP) == TASKQID_INVALID) { - zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc); - continue; - } - ARCSTAT_BUMP(arcstat_prune); - } - mutex_exit(&arc_prune_mtx); -} - ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, "Limit on number of pages that ARC shrinker can reclaim at once"); diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index ad52a11aa..d98d32c1f 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -375,7 +375,7 @@ zpl_kill_sb(struct super_block *sb) } void -zpl_prune_sb(int64_t nr_to_scan, void *arg) +zpl_prune_sb(uint64_t nr_to_scan, void *arg) { struct super_block *sb = (struct super_block *)arg; int objects = 0; diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 06544925b..dfea15b74 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -886,6 +886,8 @@ static void l2arc_do_free_on_write(void); static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, boolean_t state_only); +static void arc_prune_async(uint64_t adjust); + #define l2arc_hdr_arcstats_increment(hdr) \ l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE) #define l2arc_hdr_arcstats_decrement(hdr) \ @@ -6050,6 +6052,56 @@ arc_remove_prune_callback(arc_prune_t *p) kmem_free(p, sizeof (*p)); } +/* + * Helper function for arc_prune_async() it is responsible for safely + * handling the execution of a registered arc_prune_func_t. + */ +static void +arc_prune_task(void *ptr) +{ + arc_prune_t *ap = (arc_prune_t *)ptr; + arc_prune_func_t *func = ap->p_pfunc; + + if (func != NULL) + func(ap->p_adjust, ap->p_private); + + zfs_refcount_remove(&ap->p_refcnt, func); +} + +/* + * Notify registered consumers they must drop holds on a portion of the ARC + * buffers they reference. This provides a mechanism to ensure the ARC can + * honor the metadata limit and reclaim otherwise pinned ARC buffers. + * + * This operation is performed asynchronously so it may be safely called + * in the context of the arc_reclaim_thread(). A reference is taken here + * for each registered arc_prune_t and the arc_prune_task() is responsible + * for releasing it once the registered arc_prune_func_t has completed. + */ +static void +arc_prune_async(uint64_t adjust) +{ + arc_prune_t *ap; + + mutex_enter(&arc_prune_mtx); + for (ap = list_head(&arc_prune_list); ap != NULL; + ap = list_next(&arc_prune_list, ap)) { + + if (zfs_refcount_count(&ap->p_refcnt) >= 2) + continue; + + zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc); + ap->p_adjust = adjust; + if (taskq_dispatch(arc_prune_taskq, arc_prune_task, + ap, TQ_SLEEP) == TASKQID_INVALID) { + zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc); + continue; + } + ARCSTAT_BUMP(arcstat_prune); + } + mutex_exit(&arc_prune_mtx); +} + /* * Notify the arc that a block was freed, and thus will never be used again. */