Commit adaptive mutexes. This seems to have introduced some new

crashes but it's not clear to me yet if these are a problem with
the mutex implementation or ZFSs usage of it.

Minor taskq fixes to add new tasks to the end of the pending list.

Minor enhansements to the debug infrastructure.



git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@94 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
This commit is contained in:
behlendo
2008-05-05 20:18:49 +00:00
parent bcd68186d8
commit 9ab1ac14ad
9 changed files with 563 additions and 203 deletions
+1
View File
@@ -22,6 +22,7 @@ spl-objs += spl-kobj.o
spl-objs += spl-module.o
spl-objs += spl-generic.o
spl-objs += spl-atomic.o
spl-objs += spl-mutex.o
splmodule := spl.ko
splmoduledir := @kmoduledir@/kernel/lib/
+12 -6
View File
@@ -2,6 +2,7 @@
#include <sys/vmsystm.h>
#include <sys/vnode.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/debug.h>
#include <sys/proc.h>
#include <linux/kmod.h>
@@ -99,21 +100,26 @@ static int __init spl_init(void)
if ((rc = kmem_init()))
GOTO(out , rc);
if ((rc = vn_init()))
GOTO(out2, rc);
if ((rc = spl_mutex_init()))
GOTO(out2 , rc);
if ((rc = proc_init()))
if ((rc = vn_init()))
GOTO(out3, rc);
if ((rc = proc_init()))
GOTO(out4, rc);
if ((rc = set_hostid()))
GOTO(out4, rc = -EADDRNOTAVAIL);
GOTO(out5, rc = -EADDRNOTAVAIL);
printk("SPL: Loaded Solaris Porting Layer v%s\n", VERSION);
RETURN(rc);
out4:
out5:
proc_fini();
out3:
out4:
vn_fini();
out3:
spl_mutex_fini();
out2:
kmem_fini();
out:
+256
View File
@@ -0,0 +1,256 @@
#include <sys/mutex.h>
#ifdef DEBUG_SUBSYSTEM
#undef DEBUG_SUBSYSTEM
#endif
#define DEBUG_SUBSYSTEM S_MUTEX
/* Mutex implementation based on those found in Solaris. This means
* they the MUTEX_DEFAULT type is an adaptive mutex. When calling
* mutex_enter() your process will spin waiting for the lock if it's
* likely the lock will be free'd shortly. If it looks like the
* lock will be held for a longer time we schedule and sleep waiting
* for it. This determination is made by checking if the holder of
* the lock is currently running on cpu or sleeping waiting to be
* scheduled. If the holder is currently running it's likely the
* lock will be shortly dropped.
*
* XXX: This is basically a rough implementation to see if this
* helps our performance. If it does a more careful implementation
* should be done, perhaps in assembly.
*/
/* 0: Never spin when trying to aquire lock
* -1: Spin until aquired or holder yeilds without dropping lock
* 1-MAX_INT: Spin for N attempts before sleeping for lock
*/
int mutex_spin_max = 100;
#ifdef DEBUG_MUTEX
int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
DEFINE_MUTEX(mutex_stats_lock);
LIST_HEAD(mutex_stats_list);
#endif
void
__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
{
ASSERT(mp);
ASSERT(name);
ASSERT(ibc == NULL);
ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */
mp->km_magic = KM_MAGIC;
mp->km_owner = NULL;
mp->km_name = NULL;
mp->km_name_size = strlen(name) + 1;
switch (type) {
case MUTEX_DEFAULT:
mp->km_type = MUTEX_ADAPTIVE;
break;
case MUTEX_SPIN:
case MUTEX_ADAPTIVE:
mp->km_type = type;
break;
default:
SBUG();
}
/* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
mp->km_sem = kmem_alloc(sizeof(struct semaphore), KM_SLEEP);
if (mp->km_sem == NULL)
return;
mp->km_name = kmem_alloc(mp->km_name_size, KM_SLEEP);
if (mp->km_name == NULL) {
kmem_free(mp->km_sem, sizeof(struct semaphore));
return;
}
sema_init(mp->km_sem, 1);
strcpy(mp->km_name, name);
#ifdef DEBUG_MUTEX
mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, KM_SLEEP);
if (mp->km_stats == NULL) {
kmem_free(mp->km_name, mp->km_name_size);
kmem_free(mp->km_sem, sizeof(struct semaphore));
return;
}
mutex_lock(&mutex_stats_lock);
list_add_tail(&mp->km_list, &mutex_stats_list);
mutex_unlock(&mutex_stats_lock);
#endif
}
EXPORT_SYMBOL(__spl_mutex_init);
void
__spl_mutex_destroy(kmutex_t *mp)
{
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
#ifdef DEBUG_MUTEX
mutex_lock(&mutex_stats_lock);
list_del_init(&mp->km_list);
mutex_unlock(&mutex_stats_lock);
kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
#endif
kmem_free(mp->km_name, mp->km_name_size);
kmem_free(mp->km_sem, sizeof(struct semaphore));
memset(mp, KM_POISON, sizeof(*mp));
}
EXPORT_SYMBOL(__spl_mutex_destroy);
/* Return 1 if we acquired the mutex, else zero. */
int
__mutex_tryenter(kmutex_t *mp)
{
int rc;
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
rc = down_trylock(mp->km_sem);
if (rc == 0) {
ASSERT(mp->km_owner == NULL);
mp->km_owner = current;
MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
}
RETURN(!rc);
}
EXPORT_SYMBOL(__mutex_tryenter);
static void
mutex_enter_adaptive(kmutex_t *mp)
{
struct task_struct *owner;
int count = 0;
/* Lock is not held so we expect to aquire the lock */
if ((owner = mp->km_owner) == NULL) {
down(mp->km_sem);
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
} else {
/* The lock is held by a currently running task which
* we expect will drop the lock before leaving the
* head of the runqueue. So the ideal thing to do
* is spin until we aquire the lock and avoid a
* context switch. However it is also possible the
* task holding the lock yields the processor with
* out dropping lock. In which case, we know it's
* going to be a while so we stop spinning and go
* to sleep waiting for the lock to be available.
* This should strike the optimum balance between
* spinning and sleeping waiting for a lock.
*/
while (task_curr(owner) && (count <= mutex_spin_max)) {
if (down_trylock(mp->km_sem) == 0) {
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
GOTO(out, count);
}
count++;
}
/* The lock is held by a sleeping task so it's going to
* cost us minimally one context switch. We might as
* well sleep and yield the processor to other tasks.
*/
down(mp->km_sem);
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
}
out:
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
}
void
__mutex_enter(kmutex_t *mp)
{
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
switch (mp->km_type) {
case MUTEX_SPIN:
while (down_trylock(mp->km_sem));
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
break;
case MUTEX_ADAPTIVE:
mutex_enter_adaptive(mp);
break;
}
ASSERT(mp->km_owner == NULL);
mp->km_owner = current;
EXIT;
}
EXPORT_SYMBOL(__mutex_enter);
void
__mutex_exit(kmutex_t *mp)
{
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
ASSERT(mp->km_owner == current);
mp->km_owner = NULL;
up(mp->km_sem);
EXIT;
}
EXPORT_SYMBOL(__mutex_exit);
/* Return 1 if mutex is held by current process, else zero. */
int
__mutex_owned(kmutex_t *mp)
{
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
RETURN(mp->km_owner == current);
}
EXPORT_SYMBOL(__mutex_owned);
/* Return owner if mutex is owned, else NULL. */
kthread_t *
__spl_mutex_owner(kmutex_t *mp)
{
ENTRY;
ASSERT(mp);
ASSERT(mp->km_magic == KM_MAGIC);
RETURN(mp->km_owner);
}
EXPORT_SYMBOL(__spl_mutex_owner);
int
spl_mutex_init(void)
{
ENTRY;
RETURN(0);
}
void
spl_mutex_fini(void)
{
ENTRY;
#ifdef DEBUG_MUTEX
ASSERT(list_empty(&mutex_stats_list));
#endif
EXIT;
}
+205 -29
View File
@@ -3,8 +3,10 @@
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include <linux/sysctl.h>
#include <linux/seq_file.h>
#include <sys/sysmacros.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/debug.h>
#include "config.h"
@@ -18,10 +20,17 @@ static struct ctl_table_header *spl_header = NULL;
static unsigned long table_min = 0;
static unsigned long table_max = ~0;
#define CTL_SPL 0x87
#define CTL_SPL 0x87
#define CTL_SPL_DEBUG 0x88
#define CTL_SPL_MUTEX 0x89
#define CTL_SPL_KMEM 0x90
enum {
CTL_VERSION = 1, /* Version */
CTL_DEBUG_SUBSYS, /* Debug subsystem */
CTL_HOSTID, /* Host id reported by /usr/bin/hostid */
CTL_HW_SERIAL, /* Hardware serial number from hostid */
CTL_DEBUG_SUBSYS, /* Debug subsystem */
CTL_DEBUG_MASK, /* Debug mask */
CTL_DEBUG_PRINTK, /* Force all messages to console */
CTL_DEBUG_MB, /* Debug buffer size */
@@ -31,19 +40,23 @@ enum {
CTL_DEBUG_PATH, /* Dump log location */
CTL_DEBUG_DUMP, /* Dump debug buffer to file */
CTL_DEBUG_FORCE_BUG, /* Hook to force a BUG */
CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */
CTL_DEBUG_STACK_SIZE, /* Max observed stack size */
CTL_CONSOLE_RATELIMIT, /* Ratelimit console messages */
CTL_CONSOLE_MAX_DELAY_CS, /* Max delay at which we skip messages */
CTL_CONSOLE_MIN_DELAY_CS, /* Init delay at which we skip messages */
CTL_CONSOLE_BACKOFF, /* Delay increase factor */
CTL_STACK_SIZE, /* Max observed stack size */
#ifdef DEBUG_KMEM
CTL_KMEM_KMEMUSED, /* Crrently alloc'd kmem bytes */
CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */
CTL_KMEM_VMEMUSED, /* Currently alloc'd vmem bytes */
CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */
#endif
CTL_HOSTID, /* Host id reported by /usr/bin/hostid */
CTL_HW_SERIAL, /* Hardware serial number from hostid */
CTL_MUTEX_STATS, /* Global mutex statistics */
CTL_MUTEX_STATS_PER, /* Per mutex statistics */
CTL_MUTEX_SPIN_MAX, /* Maximum mutex spin iterations */
};
static int
@@ -368,21 +381,107 @@ proc_dohostid(struct ctl_table *table, int write, struct file *filp,
RETURN(rc);
}
static struct ctl_table spl_table[] = {
/* NB No .strategy entries have been provided since
* sysctl(8) prefers to go via /proc for portability.
*/
{
.ctl_name = CTL_VERSION,
.procname = "version",
.data = spl_version,
.maxlen = sizeof(spl_version),
.mode = 0444,
.proc_handler = &proc_dostring,
},
#ifdef DEBUG_MUTEX
static void
mutex_seq_show_headers(struct seq_file *f)
{
seq_printf(f, "%-36s %-4s %-16s\t"
"e_tot\te_nh\te_sp\te_sl\tte_tot\tte_nh\n",
"name", "type", "owner");
}
static int
mutex_seq_show(struct seq_file *f, void *p)
{
kmutex_t *mp = p;
char t = 'X';
int i;
ASSERT(mp->km_magic == KM_MAGIC);
switch (mp->km_type) {
case MUTEX_DEFAULT: t = 'D'; break;
case MUTEX_SPIN: t = 'S'; break;
case MUTEX_ADAPTIVE: t = 'A'; break;
default:
SBUG();
}
seq_printf(f, "%-36s %c ", mp->km_name, t);
if (mp->km_owner)
seq_printf(f, "%p\t", mp->km_owner);
else
seq_printf(f, "%-16s\t", "<not held>");
for (i = 0; i < MUTEX_STATS_SIZE; i++)
seq_printf(f, "%d%c", mp->km_stats[i],
(i + 1 == MUTEX_STATS_SIZE) ? '\n' : '\t');
return 0;
}
static void *
mutex_seq_start(struct seq_file *f, loff_t *pos)
{
struct list_head *p;
loff_t n = *pos;
ENTRY;
mutex_lock(&mutex_stats_lock);
if (!n)
mutex_seq_show_headers(f);
p = mutex_stats_list.next;
while (n--) {
p = p->next;
if (p == &mutex_stats_list)
RETURN(NULL);
}
RETURN(list_entry(p, kmutex_t, km_list));
}
static void *
mutex_seq_next(struct seq_file *f, void *p, loff_t *pos)
{
kmutex_t *mp = p;
ENTRY;
++*pos;
RETURN((mp->km_list.next == &mutex_stats_list) ?
NULL : list_entry(mp->km_list.next, kmutex_t, km_list));
}
static void
mutex_seq_stop(struct seq_file *f, void *v)
{
mutex_unlock(&mutex_stats_lock);
}
static struct seq_operations mutex_seq_ops = {
.show = mutex_seq_show,
.start = mutex_seq_start,
.next = mutex_seq_next,
.stop = mutex_seq_stop,
};
static int
proc_mutex_open(struct inode *inode, struct file *filp)
{
return seq_open(filp, &mutex_seq_ops);
}
static struct file_operations proc_mutex_operations = {
.open = proc_mutex_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif /* DEBUG_MUTEX */
static struct ctl_table spl_debug_table[] = {
{
.ctl_name = CTL_DEBUG_SUBSYS,
.procname = "debug_subsystem",
.procname = "subsystem",
.data = &spl_debug_subsys,
.maxlen = sizeof(unsigned long),
.mode = 0644,
@@ -390,7 +489,7 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_MASK,
.procname = "debug_mask",
.procname = "mask",
.data = &spl_debug_mask,
.maxlen = sizeof(unsigned long),
.mode = 0644,
@@ -398,7 +497,7 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_PRINTK,
.procname = "debug_printk",
.procname = "printk",
.data = &spl_debug_printk,
.maxlen = sizeof(unsigned long),
.mode = 0644,
@@ -406,13 +505,13 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_MB,
.procname = "debug_mb",
.procname = "mb",
.mode = 0644,
.proc_handler = &proc_debug_mb,
},
{
.ctl_name = CTL_DEBUG_BINARY,
.procname = "debug_binary",
.procname = "binary",
.data = &spl_debug_binary,
.maxlen = sizeof(int),
.mode = 0644,
@@ -436,7 +535,7 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_PATH,
.procname = "debug_path",
.procname = "path",
.data = spl_debug_file_path,
.maxlen = sizeof(spl_debug_file_path),
.mode = 0644,
@@ -444,7 +543,7 @@ static struct ctl_table spl_table[] = {
},
{
.ctl_name = CTL_DEBUG_DUMP,
.procname = "debug_dump",
.procname = "dump",
.mode = 0200,
.proc_handler = &proc_dump_kernel,
},
@@ -483,14 +582,40 @@ static struct ctl_table spl_table[] = {
.proc_handler = &proc_console_backoff,
},
{
.ctl_name = CTL_STACK_SIZE,
.ctl_name = CTL_DEBUG_STACK_SIZE,
.procname = "stack_max",
.data = &spl_debug_stack,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = &proc_dointvec,
},
{0},
};
#ifdef DEBUG_MUTEX
static struct ctl_table spl_mutex_table[] = {
{
.ctl_name = CTL_MUTEX_STATS,
.procname = "stats",
.data = &mutex_stats,
.maxlen = sizeof(int) * MUTEX_STATS_SIZE,
.mode = 0444,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_MUTEX_SPIN_MAX,
.procname = "spin_max",
.data = &mutex_spin_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{0},
};
#endif /* DEBUG_MUTEX */
#ifdef DEBUG_KMEM
static struct ctl_table spl_kmem_table[] = {
{
.ctl_name = CTL_KMEM_KMEMUSED,
.procname = "kmem_used",
@@ -527,7 +652,22 @@ static struct ctl_table spl_table[] = {
.mode = 0444,
.proc_handler = &proc_doulongvec_minmax,
},
#endif
{0},
};
#endif /* DEBUG_MUTEX */
static struct ctl_table spl_table[] = {
/* NB No .strategy entries have been provided since
* sysctl(8) prefers to go via /proc for portability.
*/
{
.ctl_name = CTL_VERSION,
.procname = "version",
.data = spl_version,
.maxlen = sizeof(spl_version),
.mode = 0444,
.proc_handler = &proc_dostring,
},
{
.ctl_name = CTL_HOSTID,
.procname = "hostid",
@@ -544,10 +684,32 @@ static struct ctl_table spl_table[] = {
.mode = 0444,
.proc_handler = &proc_dostring,
},
{
.ctl_name = CTL_SPL_DEBUG,
.procname = "debug",
.mode = 0555,
.child = spl_debug_table,
},
#ifdef DEBUG_MUTEX
{
.ctl_name = CTL_SPL_MUTEX,
.procname = "mutex",
.mode = 0555,
.child = spl_mutex_table,
},
#endif
#ifdef DEBUG_KMEM
{
.ctl_name = CTL_SPL_KMEM,
.procname = "kmem",
.mode = 0555,
.child = spl_kmem_table,
},
#endif
{ 0 },
};
static struct ctl_table spl_dir_table[] = {
static struct ctl_table spl_dir[] = {
{
.ctl_name = CTL_SPL,
.procname = "spl",
@@ -563,9 +725,22 @@ proc_init(void)
ENTRY;
#ifdef CONFIG_SYSCTL
spl_header = register_sysctl_table(spl_dir_table, 0);
spl_header = register_sysctl_table(spl_dir, 0);
if (spl_header == NULL)
RETURN(-EUNATCH);
#ifdef DEBUG_MUTEX
{
struct proc_dir_entry *entry = create_proc_entry("mutex_stats",
0444, NULL);
if (entry) {
entry->proc_fops = &proc_mutex_operations;
} else {
unregister_sysctl_table(spl_header);
RETURN(-EUNATCH);
}
}
#endif /* DEBUG_MUTEX */
#endif
RETURN(0);
}
@@ -577,6 +752,7 @@ proc_fini(void)
#ifdef CONFIG_SYSCTL
ASSERT(spl_header != NULL);
remove_proc_entry("mutex_stats", NULL);
unregister_sysctl_table(spl_header);
#endif
EXIT;
+3 -3
View File
@@ -106,7 +106,7 @@ task_done(taskq_t *tq, task_t *t)
t->t_id = 0;
t->t_func = NULL;
t->t_arg = NULL;
list_add(&t->t_list, &tq->tq_free_list);
list_add_tail(&t->t_list, &tq->tq_free_list);
} else {
task_free(tq, t);
}
@@ -209,7 +209,7 @@ __taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
spin_lock(&t->t_lock);
list_add(&t->t_list, &tq->tq_pend_list);
list_add_tail(&t->t_list, &tq->tq_pend_list);
t->t_id = rc = tq->tq_next_id;
tq->tq_next_id++;
t->t_func = func;
@@ -282,7 +282,7 @@ taskq_thread(void *args)
if (!list_empty(&tq->tq_pend_list)) {
t = list_entry(tq->tq_pend_list.next, task_t, t_list);
list_del_init(&t->t_list);
list_add(&t->t_list, &tq->tq_work_list);
list_add_tail(&t->t_list, &tq->tq_work_list);
tq->tq_nactive++;
spin_unlock_irq(&tq->tq_lock);