mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Implement memory and CPU hotplug
ZFS currently doesn't react to hotplugging cpu or memory into the system in any way. This patch changes that by adding logic to the ARC that allows the system to take advantage of new memory that is added for caching purposes. It also adds logic to the taskq infrastructure to support dynamically expanding the number of threads allocated to a taskq. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Co-authored-by: Matthew Ahrens <matthew.ahrens@delphix.com> Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <pcd@delphix.com> Closes #11212
This commit is contained in:
@@ -70,6 +70,11 @@
|
||||
* zeroing out the borrowed value (forcing that thread to borrow on its next
|
||||
* request, which will also be expensive). This is what makes aggsums well
|
||||
* suited for write-many read-rarely operations.
|
||||
*
|
||||
* Note that the aggsums do not expand if more CPUs are hot-added. In that
|
||||
* case, we will have less fanout than boot_ncpus, but we don't want to always
|
||||
* reserve the RAM necessary to create the extra slots for additional CPUs up
|
||||
* front, and dynamically adding them is a complex task.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
||||
+17
-7
@@ -7592,6 +7592,15 @@ arc_target_bytes(void)
|
||||
return (arc_c);
|
||||
}
|
||||
|
||||
void
|
||||
arc_set_limits(uint64_t allmem)
|
||||
{
|
||||
/* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
|
||||
arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
|
||||
|
||||
/* How to set default max varies by platform. */
|
||||
arc_c_max = arc_default_max(arc_c_min, allmem);
|
||||
}
|
||||
void
|
||||
arc_init(void)
|
||||
{
|
||||
@@ -7607,11 +7616,7 @@ arc_init(void)
|
||||
arc_lowmem_init();
|
||||
#endif
|
||||
|
||||
/* Set min cache to 1/32 of all memory, or 32MB, whichever is more. */
|
||||
arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
|
||||
|
||||
/* How to set default max varies by platform. */
|
||||
arc_c_max = arc_default_max(arc_c_min, allmem);
|
||||
arc_set_limits(allmem);
|
||||
|
||||
#ifndef _KERNEL
|
||||
/*
|
||||
@@ -7648,6 +7653,8 @@ arc_init(void)
|
||||
if (arc_c < arc_c_min)
|
||||
arc_c = arc_c_min;
|
||||
|
||||
arc_register_hotplug();
|
||||
|
||||
arc_state_init();
|
||||
|
||||
buf_init();
|
||||
@@ -7656,8 +7663,9 @@ arc_init(void)
|
||||
offsetof(arc_prune_t, p_node));
|
||||
mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
arc_prune_taskq = taskq_create("arc_prune", boot_ncpus, defclsyspri,
|
||||
boot_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
arc_prune_taskq = taskq_create("arc_prune", 100, defclsyspri,
|
||||
boot_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
|
||||
TASKQ_THREADS_CPU_PCT);
|
||||
|
||||
arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
|
||||
sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
|
||||
@@ -7754,6 +7762,8 @@ arc_fini(void)
|
||||
buf_fini();
|
||||
arc_state_fini();
|
||||
|
||||
arc_unregister_hotplug();
|
||||
|
||||
/*
|
||||
* We destroy the zthrs after all the ARC state has been
|
||||
* torn down to avoid the case of them receiving any
|
||||
|
||||
@@ -220,11 +220,12 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
||||
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
dp->dp_zrele_taskq = taskq_create("z_zrele", boot_ncpus, defclsyspri,
|
||||
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
|
||||
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
|
||||
TASKQ_THREADS_CPU_PCT);
|
||||
dp->dp_unlinked_drain_taskq = taskq_create("z_unlinked_drain",
|
||||
boot_ncpus, defclsyspri, boot_ncpus, INT_MAX,
|
||||
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
100, defclsyspri, boot_ncpus, INT_MAX,
|
||||
TASKQ_PREPOPULATE | TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
|
||||
|
||||
return (dp);
|
||||
}
|
||||
|
||||
@@ -96,9 +96,12 @@ multilist_create_impl(size_t size, size_t offset,
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new multilist, using the default number of sublists
|
||||
* (the number of CPUs, or at least 4, or the tunable
|
||||
* zfs_multilist_num_sublists).
|
||||
* Allocate a new multilist, using the default number of sublists (the number
|
||||
* of CPUs, or at least 4, or the tunable zfs_multilist_num_sublists). Note
|
||||
* that the multilists do not expand if more CPUs are hot-added. In that case,
|
||||
* we will have less fanout than boot_ncpus, but we don't want to always
|
||||
* reserve the RAM necessary to create the extra slots for additional CPUs up
|
||||
* front, and dynamically adding them is a complex task.
|
||||
*/
|
||||
multilist_t *
|
||||
multilist_create(size_t size, size_t offset,
|
||||
|
||||
+4
-4
@@ -1281,15 +1281,15 @@ spa_activate(spa_t *spa, spa_mode_t mode)
|
||||
* pool traverse code from monopolizing the global (and limited)
|
||||
* system_taskq by inappropriately scheduling long running tasks on it.
|
||||
*/
|
||||
spa->spa_prefetch_taskq = taskq_create("z_prefetch", boot_ncpus,
|
||||
defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC);
|
||||
spa->spa_prefetch_taskq = taskq_create("z_prefetch", 100,
|
||||
defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
|
||||
|
||||
/*
|
||||
* The taskq to upgrade datasets in this pool. Currently used by
|
||||
* feature SPA_FEATURE_USEROBJ_ACCOUNTING/SPA_FEATURE_PROJECT_QUOTA.
|
||||
*/
|
||||
spa->spa_upgrade_taskq = taskq_create("z_upgrade", boot_ncpus,
|
||||
defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC);
|
||||
spa->spa_upgrade_taskq = taskq_create("z_upgrade", 100,
|
||||
defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC | TASKQ_THREADS_CPU_PCT);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
+3
-2
@@ -446,8 +446,9 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
|
||||
* Commit callback taskq hasn't been created yet.
|
||||
*/
|
||||
tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
|
||||
boot_ncpus, defclsyspri, boot_ncpus, boot_ncpus * 2,
|
||||
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
100, defclsyspri, boot_ncpus, boot_ncpus * 2,
|
||||
TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
|
||||
TASKQ_THREADS_CPU_PCT);
|
||||
}
|
||||
|
||||
cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
|
||||
|
||||
Reference in New Issue
Block a user