diff --git a/include/os/linux/spl/sys/taskq.h b/include/os/linux/spl/sys/taskq.h index 2a6cd8283..6c1b4377a 100644 --- a/include/os/linux/spl/sys/taskq.h +++ b/include/os/linux/spl/sys/taskq.h @@ -104,6 +104,7 @@ typedef struct taskq { /* list node for the cpu hotplug callback */ struct hlist_node tq_hp_cb_node; boolean_t tq_hp_support; + unsigned long lastshouldstop; /* when to purge dynamic */ } taskq_t; typedef struct taskq_ent { diff --git a/man/man4/spl.4 b/man/man4/spl.4 index 02efaf16d..82455fb53 100644 --- a/man/man4/spl.4 +++ b/man/man4/spl.4 @@ -193,4 +193,19 @@ The proc file will walk the lists with lock held, reading it could cause a lock-up if the list grow too large without limiting the output. "(truncated)" will be shown if the list is larger than the limit. +. +.It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 10000 Pq uint +(Linux-only) +How long a taskq has to have had no work before we tear it down. +Previously, we would tear down a dynamic taskq worker as soon +as we noticed it had no work, but it was observed that this led +to a lot of churn in tearing down things we then immediately +spawned anew. +In practice, it seems any nonzero value will remove the vast +majority of this churn, while the nontrivially larger value +was chosen to help filter out the little remaining churn on +a mostly idle system. +Setting this value to +.Sy 0 +will revert to the previous behavior. .El diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 84497359c..d18f935b1 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -36,6 +36,12 @@ static int spl_taskq_thread_bind = 0; module_param(spl_taskq_thread_bind, int, 0644); MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default"); +static uint_t spl_taskq_thread_timeout_ms = 10000; +/* BEGIN CSTYLED */ +module_param(spl_taskq_thread_timeout_ms, uint, 0644); +/* END CSTYLED */ +MODULE_PARM_DESC(spl_taskq_thread_timeout_ms, + "Time to require a dynamic thread be idle before it gets cleaned up"); static int spl_taskq_thread_dynamic = 1; module_param(spl_taskq_thread_dynamic, int, 0444); @@ -848,12 +854,37 @@ taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt) tqt_thread_list) == tqt) return (0); - return + int no_work = ((tq->tq_nspawn == 0) && /* No threads are being spawned */ (tq->tq_nactive == 0) && /* No threads are handling tasks */ (tq->tq_nthreads > 1) && /* More than 1 thread is running */ (!taskq_next_ent(tq)) && /* There are no pending tasks */ (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */ + + /* + * If we would have said stop before, let's instead wait a bit, maybe + * we'll see more work come our way soon... + */ + if (no_work) { + /* if it's 0, we want the old behavior. */ + /* if the taskq is being torn down, we also want to go away. */ + if (spl_taskq_thread_timeout_ms == 0 || + !(tq->tq_flags & TASKQ_ACTIVE)) + return (1); + unsigned long lasttime = tq->lastshouldstop; + if (lasttime > 0) { + if (time_after(jiffies, lasttime + + msecs_to_jiffies(spl_taskq_thread_timeout_ms))) + return (1); + else + return (0); + } else { + tq->lastshouldstop = jiffies; + } + } else { + tq->lastshouldstop = 0; + } + return (0); } static int @@ -1091,6 +1122,7 @@ taskq_create(const char *name, int threads_arg, pri_t pri, tq->tq_flags = (flags | TASKQ_ACTIVE); tq->tq_next_id = TASKQID_INITIAL; tq->tq_lowest_id = TASKQID_INITIAL; + tq->lastshouldstop = 0; INIT_LIST_HEAD(&tq->tq_free_list); INIT_LIST_HEAD(&tq->tq_pend_list); INIT_LIST_HEAD(&tq->tq_prio_list);