Illumos #734: Use taskq_dispatch_ent() interface

It has been observed that some of the hottest locks are those
of the zio taskqs.  Contention on these locks can limit the
rate at which zios are dispatched which limits performance.

This upstream change from Illumos uses new interface to the
taskqs which allow them to utilize a prealloc'ed taskq_ent_t.
This removes the need to perform an allocation at dispatch
time while holding the contended lock.  This has the effect
of improving system performance.

Reviewed by: Albert Lee <trisk@nexenta.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Alexey Zaytsev <alexey.zaytsev@nexenta.com>
Reviewed by: Jason Brian King <jason.brian.king@gmail.com>
Reviewed by: George Wilson <gwilson@zfsmail.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Approved by: Gordon Ross <gwr@nexenta.com>

References to Illumos issue:
  https://www.illumos.org/issues/734

Ported-by: Prakash Surya <surya1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #482
This commit is contained in:
Garrett D'Amore
2011-11-07 16:26:52 -08:00
committed by Brian Behlendorf
parent 30a9524e45
commit a38718a63d
5 changed files with 134 additions and 41 deletions
+91 -35
View File
@@ -22,19 +22,15 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/zfs_context.h>
int taskq_now;
taskq_t *system_taskq;
typedef struct task {
struct task *task_next;
struct task *task_prev;
task_func_t *task_func;
void *task_arg;
} task_t;
#define TASKQ_ACTIVE 0x00010000
struct taskq {
@@ -51,18 +47,19 @@ struct taskq {
int tq_maxalloc;
kcondvar_t tq_maxalloc_cv;
int tq_maxalloc_wait;
task_t *tq_freelist;
task_t tq_task;
taskq_ent_t *tq_freelist;
taskq_ent_t tq_task;
};
static task_t *
static taskq_ent_t *
task_alloc(taskq_t *tq, int tqflags)
{
task_t *t;
taskq_ent_t *t;
int rv;
again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
tq->tq_freelist = t->task_next;
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
tq->tq_freelist = t->tqent_next;
} else {
if (tq->tq_nalloc >= tq->tq_maxalloc) {
if (!(tqflags & KM_SLEEP))
@@ -87,25 +84,28 @@ again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
}
mutex_exit(&tq->tq_lock);
t = kmem_alloc(sizeof (task_t), tqflags);
t = kmem_alloc(sizeof (taskq_ent_t), tqflags);
mutex_enter(&tq->tq_lock);
if (t != NULL)
if (t != NULL) {
/* Make sure we start without any flags */
t->tqent_flags = 0;
tq->tq_nalloc++;
}
}
return (t);
}
static void
task_free(taskq_t *tq, task_t *t)
task_free(taskq_t *tq, taskq_ent_t *t)
{
if (tq->tq_nalloc <= tq->tq_minalloc) {
t->task_next = tq->tq_freelist;
t->tqent_next = tq->tq_freelist;
tq->tq_freelist = t;
} else {
tq->tq_nalloc--;
mutex_exit(&tq->tq_lock);
kmem_free(t, sizeof (task_t));
kmem_free(t, sizeof (taskq_ent_t));
mutex_enter(&tq->tq_lock);
}
@@ -116,7 +116,7 @@ task_free(taskq_t *tq, task_t *t)
taskqid_t
taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
{
task_t *t;
taskq_ent_t *t;
if (taskq_now) {
func(arg);
@@ -130,26 +130,77 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
return (0);
}
if (tqflags & TQ_FRONT) {
t->task_next = tq->tq_task.task_next;
t->task_prev = &tq->tq_task;
t->tqent_next = tq->tq_task.tqent_next;
t->tqent_prev = &tq->tq_task;
} else {
t->task_next = &tq->tq_task;
t->task_prev = tq->tq_task.task_prev;
t->tqent_next = &tq->tq_task;
t->tqent_prev = tq->tq_task.tqent_prev;
}
t->task_next->task_prev = t;
t->task_prev->task_next = t;
t->task_func = func;
t->task_arg = arg;
t->tqent_next->tqent_prev = t;
t->tqent_prev->tqent_next = t;
t->tqent_func = func;
t->tqent_arg = arg;
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
cv_signal(&tq->tq_dispatch_cv);
mutex_exit(&tq->tq_lock);
return (1);
}
int
taskq_empty_ent(taskq_ent_t *t)
{
return t->tqent_next == NULL;
}
void
taskq_init_ent(taskq_ent_t *t)
{
t->tqent_next = NULL;
t->tqent_prev = NULL;
t->tqent_func = NULL;
t->tqent_arg = NULL;
t->tqent_flags = 0;
}
void
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
taskq_ent_t *t)
{
ASSERT(func != NULL);
ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
/*
* Mark it as a prealloc'd task. This is important
* to ensure that we don't free it later.
*/
t->tqent_flags |= TQENT_FLAG_PREALLOC;
/*
* Enqueue the task to the underlying queue.
*/
mutex_enter(&tq->tq_lock);
if (flags & TQ_FRONT) {
t->tqent_next = tq->tq_task.tqent_next;
t->tqent_prev = &tq->tq_task;
} else {
t->tqent_next = &tq->tq_task;
t->tqent_prev = tq->tq_task.tqent_prev;
}
t->tqent_next->tqent_prev = t;
t->tqent_prev->tqent_next = t;
t->tqent_func = func;
t->tqent_arg = arg;
cv_signal(&tq->tq_dispatch_cv);
mutex_exit(&tq->tq_lock);
}
void
taskq_wait(taskq_t *tq)
{
mutex_enter(&tq->tq_lock);
while (tq->tq_task.task_next != &tq->tq_task || tq->tq_active != 0)
while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
mutex_exit(&tq->tq_lock);
}
@@ -158,27 +209,32 @@ static void
taskq_thread(void *arg)
{
taskq_t *tq = arg;
task_t *t;
taskq_ent_t *t;
boolean_t prealloc;
mutex_enter(&tq->tq_lock);
while (tq->tq_flags & TASKQ_ACTIVE) {
if ((t = tq->tq_task.task_next) == &tq->tq_task) {
if ((t = tq->tq_task.tqent_next) == &tq->tq_task) {
if (--tq->tq_active == 0)
cv_broadcast(&tq->tq_wait_cv);
cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
tq->tq_active++;
continue;
}
t->task_prev->task_next = t->task_next;
t->task_next->task_prev = t->task_prev;
t->tqent_prev->tqent_next = t->tqent_next;
t->tqent_next->tqent_prev = t->tqent_prev;
t->tqent_next = NULL;
t->tqent_prev = NULL;
prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC;
mutex_exit(&tq->tq_lock);
rw_enter(&tq->tq_threadlock, RW_READER);
t->task_func(t->task_arg);
t->tqent_func(t->tqent_arg);
rw_exit(&tq->tq_threadlock);
mutex_enter(&tq->tq_lock);
task_free(tq, t);
if (!prealloc)
task_free(tq, t);
}
tq->tq_nthreads--;
cv_broadcast(&tq->tq_wait_cv);
@@ -217,8 +273,8 @@ taskq_create(const char *name, int nthreads, pri_t pri,
tq->tq_nthreads = nthreads;
tq->tq_minalloc = minalloc;
tq->tq_maxalloc = maxalloc;
tq->tq_task.task_next = &tq->tq_task;
tq->tq_task.task_prev = &tq->tq_task;
tq->tq_task.tqent_next = &tq->tq_task;
tq->tq_task.tqent_prev = &tq->tq_task;
tq->tq_threadlist = kmem_alloc(nthreads*sizeof(kthread_t *), KM_SLEEP);
if (flags & TASKQ_PREPOPULATE) {