Commit adaptive mutexes. This seems to have introduced some new

crashes but it's not clear to me yet if these are a problem with the mutex implementation or ZFSs usage of it. Minor taskq fixes to add new tasks to the end of the pending list. Minor enhansements to the debug infrastructure. git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@94 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
2026-05-23 10:54:35 +03:00 · 2008-05-05 20:18:49 +00:00
parent bcd68186d8
commit 9ab1ac14ad
9 changed files with 563 additions and 203 deletions
@@ -22,6 +22,7 @@ spl-objs += spl-kobj.o
 spl-objs += spl-module.o
 spl-objs += spl-generic.o
 spl-objs += spl-atomic.o
+spl-objs += spl-mutex.o

 splmodule := spl.ko
 splmoduledir := @kmoduledir@/kernel/lib/
@@ -2,6 +2,7 @@
 #include <sys/vmsystm.h>
 #include <sys/vnode.h>
 #include <sys/kmem.h>
+#include <sys/mutex.h>
 #include <sys/debug.h>
 #include <sys/proc.h>
 #include <linux/kmod.h>
@@ -99,21 +100,26 @@ static int __init spl_init(void)
 	if ((rc = kmem_init()))
 		GOTO(out , rc);

-	if ((rc = vn_init()))
-		GOTO(out2, rc);
+	if ((rc = spl_mutex_init()))
+		GOTO(out2 , rc);

-	if ((rc = proc_init()))
+	if ((rc = vn_init()))
 		GOTO(out3, rc);

+	if ((rc = proc_init()))
+		GOTO(out4, rc);
+
 	if ((rc = set_hostid()))
-		GOTO(out4, rc = -EADDRNOTAVAIL);
+		GOTO(out5, rc = -EADDRNOTAVAIL);

 	printk("SPL: Loaded Solaris Porting Layer v%s\n", VERSION);
 	RETURN(rc);
-out4:
+out5:
 	proc_fini();
-out3:
+out4:
 	vn_fini();
+out3:
+	spl_mutex_fini();
 out2:
 	kmem_fini();
 out:
@@ -0,0 +1,256 @@
+#include <sys/mutex.h>
+
+#ifdef DEBUG_SUBSYSTEM
+#undef DEBUG_SUBSYSTEM
+#endif
+
+#define DEBUG_SUBSYSTEM S_MUTEX
+
+/* Mutex implementation based on those found in Solaris.  This means
+ * they the MUTEX_DEFAULT type is an adaptive mutex.  When calling
+ * mutex_enter() your process will spin waiting for the lock if it's
+ * likely the lock will be free'd shortly.  If it looks like the
+ * lock will be held for a longer time we schedule and sleep waiting
+ * for it.  This determination is made by checking if the holder of
+ * the lock is currently running on cpu or sleeping waiting to be
+ * scheduled.  If the holder is currently running it's likely the
+ * lock will be shortly dropped.
+ *
+ * XXX: This is basically a rough implementation to see if this
+ * helps our performance.  If it does a more careful implementation
+ * should be done, perhaps in assembly.
+ */
+
+/*  0:         Never spin when trying to aquire lock
+ * -1:         Spin until aquired or holder yeilds without dropping lock
+ *  1-MAX_INT: Spin for N attempts before sleeping for lock
+ */
+int mutex_spin_max = 100;
+
+#ifdef DEBUG_MUTEX
+int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
+DEFINE_MUTEX(mutex_stats_lock);
+LIST_HEAD(mutex_stats_list);
+#endif
+
+void
+__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
+{
+	ASSERT(mp);
+	ASSERT(name);
+	ASSERT(ibc == NULL);
+	ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */
+
+	mp->km_magic = KM_MAGIC;
+	mp->km_owner = NULL;
+	mp->km_name = NULL;
+	mp->km_name_size = strlen(name) + 1;
+
+	switch (type) {
+		case MUTEX_DEFAULT:
+			mp->km_type = MUTEX_ADAPTIVE;
+			break;
+		case MUTEX_SPIN:
+		case MUTEX_ADAPTIVE:
+			mp->km_type = type;
+			break;
+		default:
+			SBUG();
+	}
+
+	/* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
+	mp->km_sem = kmem_alloc(sizeof(struct semaphore), KM_SLEEP);
+	if (mp->km_sem == NULL)
+		return;
+
+	mp->km_name = kmem_alloc(mp->km_name_size, KM_SLEEP);
+	if (mp->km_name == NULL) {
+		kmem_free(mp->km_sem, sizeof(struct semaphore));
+		return;
+	}
+
+	sema_init(mp->km_sem, 1);
+	strcpy(mp->km_name, name);
+
+#ifdef DEBUG_MUTEX
+	mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, KM_SLEEP);
+        if (mp->km_stats == NULL) {
+		kmem_free(mp->km_name, mp->km_name_size);
+		kmem_free(mp->km_sem, sizeof(struct semaphore));
+		return;
+	}
+
+	mutex_lock(&mutex_stats_lock);
+	list_add_tail(&mp->km_list, &mutex_stats_list);
+	mutex_unlock(&mutex_stats_lock);
+#endif
+}
+EXPORT_SYMBOL(__spl_mutex_init);
+
+void
+__spl_mutex_destroy(kmutex_t *mp)
+{
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+
+#ifdef DEBUG_MUTEX
+	mutex_lock(&mutex_stats_lock);
+	list_del_init(&mp->km_list);
+	mutex_unlock(&mutex_stats_lock);
+
+	kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
+#endif
+	kmem_free(mp->km_name, mp->km_name_size);
+	kmem_free(mp->km_sem, sizeof(struct semaphore));
+
+	memset(mp, KM_POISON, sizeof(*mp));
+}
+EXPORT_SYMBOL(__spl_mutex_destroy);
+
+/* Return 1 if we acquired the mutex, else zero.  */
+int
+__mutex_tryenter(kmutex_t *mp)
+{
+	int rc;
+	ENTRY;
+
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+	MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
+	MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
+
+	rc = down_trylock(mp->km_sem);
+	if (rc == 0) {
+		ASSERT(mp->km_owner == NULL);
+		mp->km_owner = current;
+		MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
+		MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
+	}
+
+	RETURN(!rc);
+}
+EXPORT_SYMBOL(__mutex_tryenter);
+
+static void
+mutex_enter_adaptive(kmutex_t *mp)
+{
+	struct task_struct *owner;
+	int count = 0;
+
+	/* Lock is not held so we expect to aquire the lock */
+	if ((owner = mp->km_owner) == NULL) {
+		down(mp->km_sem);
+		MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
+		MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
+	} else {
+		/* The lock is held by a currently running task which
+		 * we expect will drop the lock before leaving the
+		 * head of the runqueue.  So the ideal thing to do
+		 * is spin until we aquire the lock and avoid a
+		 * context switch.  However it is also possible the
+		 * task holding the lock yields the processor with
+		 * out dropping lock.  In which case, we know it's
+		 * going to be a while so we stop spinning and go
+		 * to sleep waiting for the lock to be available.
+		 * This should strike the optimum balance between
+		 * spinning and sleeping waiting for a lock.
+		 */
+		while (task_curr(owner) && (count <= mutex_spin_max)) {
+			if (down_trylock(mp->km_sem) == 0) {
+				MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
+				MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
+				GOTO(out, count);
+			}
+			count++;
+		}
+
+		/* The lock is held by a sleeping task so it's going to
+		 * cost us minimally one context switch.  We might as
+		 * well sleep and yield the processor to other tasks.
+		 */
+		down(mp->km_sem);
+		MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
+		MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
+	}
+out:
+	MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
+	MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
+}
+
+void
+__mutex_enter(kmutex_t *mp)
+{
+	ENTRY;
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+
+	switch (mp->km_type) {
+		case MUTEX_SPIN:
+			while (down_trylock(mp->km_sem));
+			MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
+			MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
+			break;
+		case MUTEX_ADAPTIVE:
+			mutex_enter_adaptive(mp);
+			break;
+	}
+
+	ASSERT(mp->km_owner == NULL);
+	mp->km_owner = current;
+
+	EXIT;
+}
+EXPORT_SYMBOL(__mutex_enter);
+
+void
+__mutex_exit(kmutex_t *mp)
+{
+	ENTRY;
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+	ASSERT(mp->km_owner == current);
+	mp->km_owner = NULL;
+	up(mp->km_sem);
+	EXIT;
+}
+EXPORT_SYMBOL(__mutex_exit);
+
+/* Return 1 if mutex is held by current process, else zero.  */
+int
+__mutex_owned(kmutex_t *mp)
+{
+	ENTRY;
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+	RETURN(mp->km_owner == current);
+}
+EXPORT_SYMBOL(__mutex_owned);
+
+/* Return owner if mutex is owned, else NULL.  */
+kthread_t *
+__spl_mutex_owner(kmutex_t *mp)
+{
+	ENTRY;
+	ASSERT(mp);
+	ASSERT(mp->km_magic == KM_MAGIC);
+	RETURN(mp->km_owner);
+}
+EXPORT_SYMBOL(__spl_mutex_owner);
+
+int
+spl_mutex_init(void)
+{
+	ENTRY;
+	RETURN(0);
+}
+
+void
+spl_mutex_fini(void)
+{
+        ENTRY;
+#ifdef DEBUG_MUTEX
+	ASSERT(list_empty(&mutex_stats_list));
+#endif
+        EXIT;
+}
+
@@ -3,8 +3,10 @@
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
 #include <linux/sysctl.h>
+#include <linux/seq_file.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
+#include <sys/mutex.h>
 #include <sys/debug.h>
 #include "config.h"

@@ -18,10 +20,17 @@ static struct ctl_table_header *spl_header = NULL;
 static unsigned long table_min = 0;
 static unsigned long table_max = ~0;

-#define CTL_SPL 0x87
+#define CTL_SPL		0x87
+#define CTL_SPL_DEBUG	0x88
+#define CTL_SPL_MUTEX	0x89
+#define CTL_SPL_KMEM	0x90
+
 enum {
 	CTL_VERSION = 1,          /* Version */
-        CTL_DEBUG_SUBSYS,         /* Debug subsystem */
+	CTL_HOSTID,               /* Host id reported by /usr/bin/hostid */
+	CTL_HW_SERIAL,            /* Hardware serial number from hostid */
+
+	CTL_DEBUG_SUBSYS,         /* Debug subsystem */
        CTL_DEBUG_MASK,           /* Debug mask */
        CTL_DEBUG_PRINTK,         /* Force all messages to console */
        CTL_DEBUG_MB,             /* Debug buffer size */
@@ -31,19 +40,23 @@ enum {
        CTL_DEBUG_PATH,           /* Dump log location */
        CTL_DEBUG_DUMP,           /* Dump debug buffer to file */
        CTL_DEBUG_FORCE_BUG,      /* Hook to force a BUG */
-        CTL_CONSOLE_RATELIMIT,    /* Ratelimit console messages */
+        CTL_DEBUG_STACK_SIZE,     /* Max observed stack size */
+
+	CTL_CONSOLE_RATELIMIT,    /* Ratelimit console messages */
        CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */
        CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */
        CTL_CONSOLE_BACKOFF,      /* Delay increase factor */
-        CTL_STACK_SIZE,           /* Max observed stack size */
+
 #ifdef DEBUG_KMEM
        CTL_KMEM_KMEMUSED,        /* Crrently alloc'd kmem bytes */
        CTL_KMEM_KMEMMAX,         /* Max alloc'd by kmem bytes */
        CTL_KMEM_VMEMUSED,        /* Currently alloc'd vmem bytes */
        CTL_KMEM_VMEMMAX,         /* Max alloc'd by vmem bytes */
 #endif
-	CTL_HOSTID,               /* Host id reported by /usr/bin/hostid */
-	CTL_HW_SERIAL,            /* Hardware serial number from hostid */
+
+	CTL_MUTEX_STATS,          /* Global mutex statistics */
+	CTL_MUTEX_STATS_PER,      /* Per mutex statistics */
+	CTL_MUTEX_SPIN_MAX,       /* Maximum mutex spin iterations */
 };

 static int
@@ -368,21 +381,107 @@ proc_dohostid(struct ctl_table *table, int write, struct file *filp,
        RETURN(rc);
 }

-static struct ctl_table spl_table[] = {
-        /* NB No .strategy entries have been provided since
-         * sysctl(8) prefers to go via /proc for portability.
-         */
-        {
-                .ctl_name = CTL_VERSION,
-                .procname = "version",
-                .data     = spl_version,
-                .maxlen   = sizeof(spl_version),
-                .mode     = 0444,
-                .proc_handler = &proc_dostring,
-        },
+#ifdef DEBUG_MUTEX
+static void
+mutex_seq_show_headers(struct seq_file *f)
+{
+        seq_printf(f, "%-36s %-4s %-16s\t"
+                   "e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n",
+		   "name", "type", "owner");
+}
+
+static int
+mutex_seq_show(struct seq_file *f, void *p)
+{
+        kmutex_t *mp = p;
+	char t = 'X';
+        int i;
+
+	ASSERT(mp->km_magic == KM_MAGIC);
+
+	switch (mp->km_type) {
+		case MUTEX_DEFAULT:	t = 'D';	break;
+		case MUTEX_SPIN:	t = 'S';	break;
+		case MUTEX_ADAPTIVE:	t = 'A';	break;
+		default:
+			SBUG();
+	}
+        seq_printf(f, "%-36s %c    ", mp->km_name, t);
+	if (mp->km_owner)
+                seq_printf(f, "%p\t", mp->km_owner);
+	else
+                seq_printf(f, "%-16s\t", "<not held>");
+
+        for (i = 0; i < MUTEX_STATS_SIZE; i++)
+                seq_printf(f, "%d%c", mp->km_stats[i],
+                           (i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t');
+
+        return 0;
+}
+
+static void *
+mutex_seq_start(struct seq_file *f, loff_t *pos)
+{
+        struct list_head *p;
+        loff_t n = *pos;
+        ENTRY;
+
+        mutex_lock(&mutex_stats_lock);
+        if (!n)
+                mutex_seq_show_headers(f);
+
+        p = mutex_stats_list.next;
+        while (n--) {
+                p = p->next;
+                if (p == &mutex_stats_list)
+                        RETURN(NULL);
+        }
+
+        RETURN(list_entry(p, kmutex_t, km_list));
+}
+
+static void *
+mutex_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+	kmutex_t *mp = p;
+        ENTRY;
+
+        ++*pos;
+        RETURN((mp->km_list.next == &mutex_stats_list) ?
+	       NULL : list_entry(mp->km_list.next, kmutex_t, km_list));
+}
+
+static void
+mutex_seq_stop(struct seq_file *f, void *v)
+{
+        mutex_unlock(&mutex_stats_lock);
+}
+
+static struct seq_operations mutex_seq_ops = {
+        .show  = mutex_seq_show,
+        .start = mutex_seq_start,
+        .next  = mutex_seq_next,
+        .stop  = mutex_seq_stop,
+};
+
+static int
+proc_mutex_open(struct inode *inode, struct file *filp)
+{
+        return seq_open(filp, &mutex_seq_ops);
+}
+
+static struct file_operations proc_mutex_operations = {
+        .open           = proc_mutex_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+#endif /* DEBUG_MUTEX */
+
+static struct ctl_table spl_debug_table[] = {
        {
                .ctl_name = CTL_DEBUG_SUBSYS,
-                .procname = "debug_subsystem",
+                .procname = "subsystem",
                .data     = &spl_debug_subsys,
                .maxlen   = sizeof(unsigned long),
                .mode     = 0644,
@@ -390,7 +489,7 @@ static struct ctl_table spl_table[] = {
        },
        {
                .ctl_name = CTL_DEBUG_MASK,
-                .procname = "debug_mask",
+                .procname = "mask",
                .data     = &spl_debug_mask,
                .maxlen   = sizeof(unsigned long),
                .mode     = 0644,
@@ -398,7 +497,7 @@ static struct ctl_table spl_table[] = {
        },
        {
                .ctl_name = CTL_DEBUG_PRINTK,
-                .procname = "debug_printk",
+                .procname = "printk",
                .data     = &spl_debug_printk,
                .maxlen   = sizeof(unsigned long),
                .mode     = 0644,
@@ -406,13 +505,13 @@ static struct ctl_table spl_table[] = {
        },
        {
                .ctl_name = CTL_DEBUG_MB,
-                .procname = "debug_mb",
+                .procname = "mb",
                .mode     = 0644,
                .proc_handler = &proc_debug_mb,
        },
        {
                .ctl_name = CTL_DEBUG_BINARY,
-                .procname = "debug_binary",
+                .procname = "binary",
                .data     = &spl_debug_binary,
                .maxlen   = sizeof(int),
                .mode     = 0644,
@@ -436,7 +535,7 @@ static struct ctl_table spl_table[] = {
        },
        {
                .ctl_name = CTL_DEBUG_PATH,
-                .procname = "debug_path",
+                .procname = "path",
                .data     = spl_debug_file_path,
                .maxlen   = sizeof(spl_debug_file_path),
                .mode     = 0644,
@@ -444,7 +543,7 @@ static struct ctl_table spl_table[] = {
        },
        {
                .ctl_name = CTL_DEBUG_DUMP,
-                .procname = "debug_dump",
+                .procname = "dump",
                .mode     = 0200,
                .proc_handler = &proc_dump_kernel,
        },
@@ -483,14 +582,40 @@ static struct ctl_table spl_table[] = {
                .proc_handler = &proc_console_backoff,
        },
        {
-                .ctl_name = CTL_STACK_SIZE,
+                .ctl_name = CTL_DEBUG_STACK_SIZE,
                .procname = "stack_max",
                .data     = &spl_debug_stack,
                .maxlen   = sizeof(int),
                .mode     = 0444,
                .proc_handler = &proc_dointvec,
        },
+	{0},
+};
+
+#ifdef DEBUG_MUTEX
+static struct ctl_table spl_mutex_table[] = {
+        {
+                .ctl_name = CTL_MUTEX_STATS,
+                .procname = "stats",
+                .data     = &mutex_stats,
+                .maxlen   = sizeof(int) * MUTEX_STATS_SIZE,
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec,
+        },
+        {
+                .ctl_name = CTL_MUTEX_SPIN_MAX,
+                .procname = "spin_max",
+                .data     = &mutex_spin_max,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec,
+        },
+	{0},
+};
+#endif /* DEBUG_MUTEX */
+
 #ifdef DEBUG_KMEM
+static struct ctl_table spl_kmem_table[] = {
        {
                .ctl_name = CTL_KMEM_KMEMUSED,
                .procname = "kmem_used",
@@ -527,7 +652,22 @@ static struct ctl_table spl_table[] = {
                .mode     = 0444,
                .proc_handler = &proc_doulongvec_minmax,
        },
-#endif
+	{0},
+};
+#endif /* DEBUG_MUTEX */
+
+static struct ctl_table spl_table[] = {
+        /* NB No .strategy entries have been provided since
+         * sysctl(8) prefers to go via /proc for portability.
+         */
+        {
+                .ctl_name = CTL_VERSION,
+                .procname = "version",
+                .data     = spl_version,
+                .maxlen   = sizeof(spl_version),
+                .mode     = 0444,
+                .proc_handler = &proc_dostring,
+        },
        {
                .ctl_name = CTL_HOSTID,
                .procname = "hostid",
@@ -544,10 +684,32 @@ static struct ctl_table spl_table[] = {
                .mode     = 0444,
                .proc_handler = &proc_dostring,
        },
+	{
+		.ctl_name = CTL_SPL_DEBUG,
+		.procname = "debug",
+		.mode     = 0555,
+		.child    = spl_debug_table,
+	},
+#ifdef DEBUG_MUTEX
+	{
+		.ctl_name = CTL_SPL_MUTEX,
+		.procname = "mutex",
+		.mode     = 0555,
+		.child    = spl_mutex_table,
+	},
+#endif
+#ifdef DEBUG_KMEM
+	{
+		.ctl_name = CTL_SPL_KMEM,
+		.procname = "kmem",
+		.mode     = 0555,
+		.child    = spl_kmem_table,
+	},
+#endif
        { 0 },
 };

-static struct ctl_table spl_dir_table[] = {
+static struct ctl_table spl_dir[] = {
        {
                .ctl_name = CTL_SPL,
                .procname = "spl",
@@ -563,9 +725,22 @@ proc_init(void)
        ENTRY;

 #ifdef CONFIG_SYSCTL
-        spl_header = register_sysctl_table(spl_dir_table, 0);
+        spl_header = register_sysctl_table(spl_dir, 0);
 	if (spl_header == NULL)
 		RETURN(-EUNATCH);
+
+#ifdef DEBUG_MUTEX
+	{
+                struct proc_dir_entry *entry = create_proc_entry("mutex_stats",
+								 0444, NULL);
+                if (entry) {
+                        entry->proc_fops = &proc_mutex_operations;
+                } else {
+                        unregister_sysctl_table(spl_header);
+                        RETURN(-EUNATCH);
+                }
+	}
+#endif /* DEBUG_MUTEX */
 #endif
        RETURN(0);
 }
@@ -577,6 +752,7 @@ proc_fini(void)

 #ifdef CONFIG_SYSCTL
        ASSERT(spl_header != NULL);
+        remove_proc_entry("mutex_stats", NULL);
        unregister_sysctl_table(spl_header);
 #endif
        EXIT;
@@ -106,7 +106,7 @@ task_done(taskq_t *tq, task_t *t)
 		t->t_id = 0;
 		t->t_func = NULL;
 		t->t_arg = NULL;
-                list_add(&t->t_list, &tq->tq_free_list);
+                list_add_tail(&t->t_list, &tq->tq_free_list);
 	} else {
 		task_free(tq, t);
 	}
@@ -209,7 +209,7 @@ __taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)


 	spin_lock(&t->t_lock);
-	list_add(&t->t_list, &tq->tq_pend_list);
+	list_add_tail(&t->t_list, &tq->tq_pend_list);
 	t->t_id = rc = tq->tq_next_id;
 	tq->tq_next_id++;
        t->t_func = func;
@@ -282,7 +282,7 @@ taskq_thread(void *args)
                if (!list_empty(&tq->tq_pend_list)) {
                        t = list_entry(tq->tq_pend_list.next, task_t, t_list);
                        list_del_init(&t->t_list);
-			list_add(&t->t_list, &tq->tq_work_list);
+			list_add_tail(&t->t_list, &tq->tq_work_list);
                        tq->tq_nactive++;
 			spin_unlock_irq(&tq->tq_lock);