From e35704647e84c62c6a6017ead0b66b446010e4ff Mon Sep 17 00:00:00 2001 From: loli10K Date: Sun, 27 Oct 2019 00:22:19 +0200 Subject: [PATCH] Fix for ARC sysctls ignored at runtime This change leverage module_param_call() to run arc_tuning_update() immediately after the ARC tunable has been updated as suggested in cffa8372 code review. A simple test case is added to the ZFS Test Suite to prevent future regressions in functionality. Reviewed-by: Matt Macy Reviewed-by: Brian Behlendorf Signed-off-by: loli10K Closes #9487 Closes #9489 --- include/os/linux/kernel/linux/mod_compat.h | 46 +++++++++++--- include/sys/arc_impl.h | 4 ++ include/sys/zfs_context.h | 6 ++ module/os/linux/zfs/arc_os.c | 28 +++++++++ module/zfs/arc.c | 63 ++++++++++--------- module/zfs/spa_misc.c | 6 +- tests/runfiles/linux.run | 3 +- .../tests/functional/arc/Makefile.am | 1 + .../arc/arcstats_runtime_tuning.ksh | 46 ++++++++++++++ tests/zfs-tests/tests/perf/perf.shlib | 17 +++++ 10 files changed, 178 insertions(+), 42 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh diff --git a/include/os/linux/kernel/linux/mod_compat.h b/include/os/linux/kernel/linux/mod_compat.h index 1f19f4b40..5579cb5bf 100644 --- a/include/os/linux/kernel/linux/mod_compat.h +++ b/include/os/linux/kernel/linux/mod_compat.h @@ -76,26 +76,26 @@ enum scope_prefix_types { /* * Declare a module parameter / sysctl node * - * scope_prefix the part of the the sysctl / sysfs tree the node resides under + * "scope_prefix" the part of the the sysctl / sysfs tree the node resides under * (currently a no-op on Linux) - * name_prefix the part of the variable name that will be excluded from the + * "name_prefix" the part of the variable name that will be excluded from the * exported names on platforms with a hierarchical namespace - * name the part of the variable that will be exposed on platforms with a + * "name" the part of the variable that will be exposed on platforms with a * hierarchical namespace, or as name_prefix ## name on Linux - * type the variable type - * perm the permissions (read/write or read only) - * desc a brief description of the option + * "type" the variable type + * "perm" the permissions (read/write or read only) + * "desc" a brief description of the option * * Examples: * ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, rotating_inc, UINT, - * ZMOD_RW, "Rotating media load increment for non-seeking I/O's"); + * ZMOD_RW, "Rotating media load increment for non-seeking I/O's"); * on FreeBSD: * vfs.zfs.vdev.mirror.rotating_inc * on Linux: * zfs_vdev_mirror_rotating_inc * - * *ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, UINT, ZMOD_RW, - * "Limit one prefetch call to this size"); + * ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, UINT, ZMOD_RW, + * "Limit one prefetch call to this size"); * on FreeBSD: * vfs.zfs.dmu_prefetch_max * on Linux: @@ -108,5 +108,33 @@ enum scope_prefix_types { MODULE_PARM_DESC(name_prefix ## name, desc) /* END CSTYLED */ +/* + * Declare a module parameter / sysctl node + * + * "scope_prefix" the part of the the sysctl / sysfs tree the node resides under + * (currently a no-op on Linux) + * "name_prefix" the part of the variable name that will be excluded from the + * exported names on platforms with a hierarchical namespace + * "name" the part of the variable that will be exposed on platforms with a + * hierarchical namespace, or as name_prefix ## name on Linux + * "setfunc" setter function + * "getfunc" getter function + * "perm" the permissions (read/write or read only) + * "desc" a brief description of the option + * + * Examples: + * ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift, + * param_get_int, ZMOD_RW, "Reserved free space in pool"); + * on FreeBSD: + * vfs.zfs.spa_slop_shift + * on Linux: + * spa_slop_shift + */ +/* BEGIN CSTYLED */ +#define ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, getfunc, perm, desc) \ + CTASSERT_GLOBAL((sizeof (scope_prefix) == sizeof (enum scope_prefix_types))); \ + module_param_call(name_prefix ## name, setfunc, getfunc, &name_prefix ## name, perm); \ + MODULE_PARM_DESC(name_prefix ## name, desc) +/* END CSTYLED */ #endif /* _MOD_COMPAT_H */ diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index 28d430f04..8eeee54c9 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -611,6 +611,10 @@ extern void arc_prune_async(int64_t); extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg); extern uint64_t arc_free_memory(void); extern int64_t arc_available_memory(void); +extern void arc_tuning_update(void); + +extern int param_set_arc_long(const char *buf, zfs_kernel_param_t *kp); +extern int param_set_arc_int(const char *buf, zfs_kernel_param_t *kp); #ifdef __cplusplus } diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index a493c30dd..18d9b25a3 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -198,7 +198,13 @@ extern int aok; /* * Tunables. */ +typedef struct zfs_kernel_param { + const char *name; /* unused stub */ +} zfs_kernel_param_t; + #define ZFS_MODULE_PARAM(scope_prefix, name_prefix, name, type, perm, desc) +#define ZFS_MODULE_PARAM_CALL(scope_prefix, name_prefix, name, setfunc, \ + getfunc, perm, desc) /* * Exported symbols diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index 696f671ab..c9db31096 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -357,6 +357,34 @@ arc_lowmem_fini(void) { spl_unregister_shrinker(&arc_shrinker); } + +int +param_set_arc_long(const char *buf, zfs_kernel_param_t *kp) +{ + int error; + + error = param_set_long(buf, kp); + if (error < 0) + return (SET_ERROR(error)); + + arc_tuning_update(); + + return (0); +} + +int +param_set_arc_int(const char *buf, zfs_kernel_param_t *kp) +{ + int error; + + error = param_set_int(buf, kp); + if (error < 0) + return (SET_ERROR(error)); + + arc_tuning_update(); + + return (0); +} #else /* _KERNEL */ int64_t arc_available_memory(void) diff --git a/module/zfs/arc.c b/module/zfs/arc.c index b1a9681dd..45211bc5c 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -24,6 +24,7 @@ * Copyright (c) 2011, 2019 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2019, loli10K . All rights reserved. */ /* @@ -814,7 +815,6 @@ static void arc_hdr_alloc_abd(arc_buf_hdr_t *, boolean_t); static void arc_access(arc_buf_hdr_t *, kmutex_t *); static boolean_t arc_is_overflowing(void); static void arc_buf_watch(arc_buf_t *); -static void arc_tuning_update(void); static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *); static uint32_t arc_bufc_to_flags(arc_buf_contents_t); @@ -6873,10 +6873,12 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj) /* * Called during module initialization and periodically thereafter to - * apply reasonable changes to the exposed performance tunings. Non-zero - * zfs_* values which differ from the currently set values will be applied. + * apply reasonable changes to the exposed performance tunings. Can also be + * called explicitly by param_set_arc_*() functions when ARC tunables are + * updated manually. Non-zero zfs_* values which differ from the currently set + * values will be applied. */ -static void +void arc_tuning_update(void) { uint64_t allmem = arc_all_memory(); @@ -8694,20 +8696,21 @@ EXPORT_SYMBOL(arc_add_prune_callback); EXPORT_SYMBOL(arc_remove_prune_callback); /* BEGIN CSTYLED */ -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, min, ULONG, ZMOD_RW, - "Min arc size"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min, param_set_arc_long, + param_get_long, ZMOD_RW, "Min arc size"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, max, ULONG, ZMOD_RW, - "Max arc size"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, max, param_set_arc_long, + param_get_long, ZMOD_RW, "Max arc size"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_limit, ULONG, ZMOD_RW, - "Metadata limit for arc size"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit, param_set_arc_long, + param_get_long, ZMOD_RW, "Metadata limit for arc size"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_limit_percent, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_limit_percent, + param_set_arc_long, param_get_long, ZMOD_RW, "Percent of arc size for arc meta limit"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_min, ULONG, ZMOD_RW, - "Min arc metadata"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, meta_min, param_set_arc_long, + param_get_long, ZMOD_RW, "Min arc metadata"); ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_prune, INT, ZMOD_RW, "Meta objects to scan for prune"); @@ -8718,20 +8721,20 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_adjust_restarts, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, meta_strategy, INT, ZMOD_RW, "Meta reclaim strategy"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, grow_retry, INT, ZMOD_RW, - "Seconds before growing arc size"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, grow_retry, param_set_arc_int, + param_get_int, ZMOD_RW, "Seconds before growing arc size"); ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, p_dampener_disable, INT, ZMOD_RW, "Disable arc_p adapt dampener"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrink_shift, INT, ZMOD_RW, - "log2(fraction of arc to reclaim)"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, shrink_shift, param_set_arc_int, + param_get_int, ZMOD_RW, "log2(fraction of arc to reclaim)"); ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, pc_percent, UINT, ZMOD_RW, "Percent of pagecache to reclaim arc to"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, p_min_shift, INT, ZMOD_RW, - "arc_c shift to calc min/max arc_p"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, p_min_shift, param_set_arc_int, + param_get_int, ZMOD_RW, "arc_c shift to calc min/max arc_p"); ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, average_blocksize, INT, ZMOD_RD, "Target average block size"); @@ -8739,10 +8742,11 @@ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, average_blocksize, INT, ZMOD_RD, ZFS_MODULE_PARAM(zfs, zfs_, compressed_arc_enabled, INT, ZMOD_RW, "Disable compressed arc buffers"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, min_prefetch_ms, INT, ZMOD_RW, - "Min life of prefetch block in ms"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prefetch_ms, param_set_arc_int, + param_get_int, ZMOD_RW, "Min life of prefetch block in ms"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, min_prescient_prefetch_ms, INT, ZMOD_RW, +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, min_prescient_prefetch_ms, + param_set_arc_int, param_get_int, ZMOD_RW, "Min life of prescient prefetched block in ms"); ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, write_max, ULONG, ZMOD_RW, @@ -8772,16 +8776,17 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, feed_again, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, norw, INT, ZMOD_RW, "No reads during writes"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, lotsfree_percent, INT, ZMOD_RW, - "System free memory I/O throttle in bytes"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int, + param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, sys_free, ULONG, ZMOD_RW, - "System free memory target size in bytes"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, sys_free, param_set_arc_long, + param_get_long, ZMOD_RW, "System free memory target size in bytes"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_limit, ULONG, ZMOD_RW, - "Minimum bytes of dnodes in arc"); +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit, param_set_arc_long, + param_get_long, ZMOD_RW, "Minimum bytes of dnodes in arc"); -ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_limit_percent, ULONG, ZMOD_RW, +ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, dnode_limit_percent, + param_set_arc_long, param_get_long, ZMOD_RW, "Percent of ARC meta buffers for dnodes"); ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, dnode_reduce_percent, ULONG, ZMOD_RW, diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 78bf110b4..46279517d 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -26,6 +26,7 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2019, loli10K . All rights reserved. */ #include @@ -2920,9 +2921,8 @@ module_param_call(zfs_deadman_ziotime_ms, param_set_deadman_ziotime, MODULE_PARM_DESC(zfs_deadman_ziotime_ms, "IO expiration time in milliseconds"); -module_param_call(spa_slop_shift, param_set_slop_shift, param_get_int, - &spa_slop_shift, 0644); -MODULE_PARM_DESC(spa_slop_shift, "Reserved free space in pool"); +ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift, + param_get_int, ZMOD_RW, "Reserved free space in pool"); module_param_call(zfs_deadman_failmode, param_set_deadman_failmode, param_get_charp, &zfs_deadman_failmode, 0644); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 777588ac4..a134c8db8 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -25,7 +25,8 @@ tests = ['posix_001_pos', 'posix_002_pos', 'posix_003_pos'] tags = ['functional', 'acl', 'posix'] [tests/functional/arc:Linux] -tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos'] +tests = ['dbufstats_001_pos', 'dbufstats_002_pos', 'dbufstats_003_pos', + 'arcstats_runtime_tuning'] tags = ['functional', 'arc'] [tests/functional/atime:Linux] diff --git a/tests/zfs-tests/tests/functional/arc/Makefile.am b/tests/zfs-tests/tests/functional/arc/Makefile.am index 22704fa51..809d0346f 100644 --- a/tests/zfs-tests/tests/functional/arc/Makefile.am +++ b/tests/zfs-tests/tests/functional/arc/Makefile.am @@ -2,6 +2,7 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/arc dist_pkgdata_SCRIPTS = \ cleanup.ksh \ setup.ksh \ + arcstats_runtime_tuning.ksh \ dbufstats_001_pos.ksh \ dbufstats_002_pos.ksh \ dbufstats_003_pos.ksh diff --git a/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh b/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh new file mode 100755 index 000000000..6d007aecf --- /dev/null +++ b/tests/zfs-tests/tests/functional/arc/arcstats_runtime_tuning.ksh @@ -0,0 +1,46 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, loli10K . All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/perf/perf.shlib + +function cleanup +{ + # Set tunables to their recorded actual size and then to their original + # value: this works for previously unconfigured tunables. + log_must set_tunable64 zfs_arc_min "$MINSIZE" + log_must set_tunable64 zfs_arc_min "$ZFS_ARC_MIN" + log_must set_tunable64 zfs_arc_max "$MAXSIZE" + log_must set_tunable64 zfs_arc_max "$ZFS_ARC_MAX" +} + +log_onexit cleanup + +ZFS_ARC_MAX="$(get_tunable zfs_arc_max)" +ZFS_ARC_MIN="$(get_tunable zfs_arc_min)" +MINSIZE="$(get_min_arc_size)" +MAXSIZE="$(get_max_arc_size)" + +log_assert "ARC tunables should be updated dynamically" + +for size in $((MAXSIZE/4)) $((MAXSIZE/3)) $((MAXSIZE/2)) $MAXSIZE; do + log_must set_tunable64 zfs_arc_max "$size" + log_must test "$(get_max_arc_size)" == "$size" + log_must set_tunable64 zfs_arc_min "$size" + log_must test "$(get_min_arc_size)" == "$size" +done + +log_pass "ARC tunables can be updated dynamically" diff --git a/tests/zfs-tests/tests/perf/perf.shlib b/tests/zfs-tests/tests/perf/perf.shlib index 69e61e9fd..e2e84ca02 100644 --- a/tests/zfs-tests/tests/perf/perf.shlib +++ b/tests/zfs-tests/tests/perf/perf.shlib @@ -373,6 +373,23 @@ function get_directory echo $directory } +function get_min_arc_size +{ + if is_linux; then + typeset -l min_arc_size=`awk '$1 == "c_min" { print $3 }' \ + /proc/spl/kstat/zfs/arcstats` + else + typeset -l min_arc_size=$(dtrace -qn 'BEGIN { + printf("%u\n", `arc_stats.arcstat_c_min.value.ui64); + exit(0); + }') + fi + + [[ $? -eq 0 ]] || log_fail "get_min_arc_size failed" + + echo $min_arc_size +} + function get_max_arc_size { if is_linux; then