mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	working on this branch for the next few days I suggested you work off of the 0.3.1 tag. The following changes are fairly extensive and are designed to make the SPL compatible with all kernels in the range of 2.6.18-2.6.25. There were 13 relevant API changes between these releases and I have added the needed autoconf tests to check for them. However, this has not all been tested extensively. I'll sort of the breakage on Fedora Core 9 and RHEL5 this week. SPL_AC_TYPE_UINTPTR_T SPL_AC_TYPE_KMEM_CACHE_T SPL_AC_KMEM_CACHE_DESTROY_INT SPL_AC_ATOMIC_PANIC_NOTIFIER SPL_AC_3ARGS_INIT_WORK SPL_AC_2ARGS_REGISTER_SYSCTL SPL_AC_KMEM_CACHE_T SPL_AC_KMEM_CACHE_CREATE_DTOR SPL_AC_3ARG_KMEM_CACHE_CREATE_CTOR SPL_AC_SET_SHRINKER SPL_AC_PATH_IN_NAMEIDATA SPL_AC_TASK_CURR SPL_AC_CTL_UNNUMBERED git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@119 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
		
			
				
	
	
		
			308 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			308 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 *  This file is part of the SPL: Solaris Porting Layer.
 | 
						|
 *
 | 
						|
 *  Copyright (c) 2008 Lawrence Livermore National Security, LLC.
 | 
						|
 *  Produced at Lawrence Livermore National Laboratory
 | 
						|
 *  Written by:
 | 
						|
 *          Brian Behlendorf <behlendorf1@llnl.gov>,
 | 
						|
 *          Herb Wartens <wartens2@llnl.gov>,
 | 
						|
 *          Jim Garlick <garlick@llnl.gov>
 | 
						|
 *  UCRL-CODE-235197
 | 
						|
 *
 | 
						|
 *  This is free software; you can redistribute it and/or modify it
 | 
						|
 *  under the terms of the GNU General Public License as published by
 | 
						|
 *  the Free Software Foundation; either version 2 of the License, or
 | 
						|
 *  (at your option) any later version.
 | 
						|
 *
 | 
						|
 *  This is distributed in the hope that it will be useful, but WITHOUT
 | 
						|
 *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | 
						|
 *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 | 
						|
 *  for more details.
 | 
						|
 *
 | 
						|
 *  You should have received a copy of the GNU General Public License along
 | 
						|
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 | 
						|
 *  51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 | 
						|
 */
 | 
						|
 | 
						|
#include <sys/mutex.h>
 | 
						|
 | 
						|
#ifdef DEBUG_SUBSYSTEM
 | 
						|
#undef DEBUG_SUBSYSTEM
 | 
						|
#endif
 | 
						|
 | 
						|
#define DEBUG_SUBSYSTEM S_MUTEX
 | 
						|
 | 
						|
/* Mutex implementation based on those found in Solaris.  This means
 | 
						|
 * they the MUTEX_DEFAULT type is an adaptive mutex.  When calling
 | 
						|
 * mutex_enter() your process will spin waiting for the lock if it's
 | 
						|
 * likely the lock will be free'd shortly.  If it looks like the
 | 
						|
 * lock will be held for a longer time we schedule and sleep waiting
 | 
						|
 * for it.  This determination is made by checking if the holder of
 | 
						|
 * the lock is currently running on cpu or sleeping waiting to be
 | 
						|
 * scheduled.  If the holder is currently running it's likely the
 | 
						|
 * lock will be shortly dropped.
 | 
						|
 *
 | 
						|
 * XXX: This is basically a rough implementation to see if this
 | 
						|
 * helps our performance.  If it does a more careful implementation
 | 
						|
 * should be done, perhaps in assembly.
 | 
						|
 */
 | 
						|
 | 
						|
/*  0:         Never spin when trying to aquire lock
 | 
						|
 * -1:         Spin until aquired or holder yeilds without dropping lock
 | 
						|
 *  1-MAX_INT: Spin for N attempts before sleeping for lock
 | 
						|
 */
 | 
						|
int mutex_spin_max = 0;
 | 
						|
 | 
						|
#ifdef DEBUG_MUTEX
 | 
						|
int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
 | 
						|
spinlock_t mutex_stats_lock;
 | 
						|
struct list_head mutex_stats_list;
 | 
						|
#endif
 | 
						|
 | 
						|
void
 | 
						|
__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
 | 
						|
{
 | 
						|
	int flags = KM_SLEEP;
 | 
						|
 | 
						|
	ASSERT(mp);
 | 
						|
	ASSERT(name);
 | 
						|
	ASSERT(ibc == NULL);
 | 
						|
	ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */
 | 
						|
 | 
						|
	mp->km_magic = KM_MAGIC;
 | 
						|
	mp->km_owner = NULL;
 | 
						|
	mp->km_name = NULL;
 | 
						|
	mp->km_name_size = strlen(name) + 1;
 | 
						|
 | 
						|
	switch (type) {
 | 
						|
		case MUTEX_DEFAULT:
 | 
						|
			mp->km_type = MUTEX_ADAPTIVE;
 | 
						|
			break;
 | 
						|
		case MUTEX_SPIN:
 | 
						|
		case MUTEX_ADAPTIVE:
 | 
						|
			mp->km_type = type;
 | 
						|
			break;
 | 
						|
		default:
 | 
						|
			SBUG();
 | 
						|
	}
 | 
						|
 | 
						|
	/* We may be called when there is a non-zero preempt_count or
 | 
						|
	 * interrupts are disabled is which case we must not sleep.
 | 
						|
	 */
 | 
						|
        if (current_thread_info()->preempt_count || irqs_disabled())
 | 
						|
		flags = KM_NOSLEEP;
 | 
						|
 | 
						|
	/* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
 | 
						|
	mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags);
 | 
						|
	if (mp->km_sem == NULL)
 | 
						|
		return;
 | 
						|
 | 
						|
	mp->km_name = kmem_alloc(mp->km_name_size, flags);
 | 
						|
	if (mp->km_name == NULL) {
 | 
						|
		kmem_free(mp->km_sem, sizeof(struct semaphore));
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	sema_init(mp->km_sem, 1);
 | 
						|
	strncpy(mp->km_name, name, mp->km_name_size);
 | 
						|
 | 
						|
#ifdef DEBUG_MUTEX
 | 
						|
	mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags);
 | 
						|
        if (mp->km_stats == NULL) {
 | 
						|
		kmem_free(mp->km_name, mp->km_name_size);
 | 
						|
		kmem_free(mp->km_sem, sizeof(struct semaphore));
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	/* XXX - This appears to be a much more contended lock than I
 | 
						|
	 * would have expected.  To run with this debugging enabled and
 | 
						|
	 * get reasonable performance we may need to be more clever and
 | 
						|
	 * do something like hash the mutex ptr on to one of several
 | 
						|
	 * lists to ease this single point of contention.
 | 
						|
	 */
 | 
						|
	spin_lock(&mutex_stats_lock);
 | 
						|
	list_add_tail(&mp->km_list, &mutex_stats_list);
 | 
						|
	spin_unlock(&mutex_stats_lock);
 | 
						|
#endif
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__spl_mutex_init);
 | 
						|
 | 
						|
void
 | 
						|
__spl_mutex_destroy(kmutex_t *mp)
 | 
						|
{
 | 
						|
	ASSERT(mp);
 | 
						|
	ASSERT(mp->km_magic == KM_MAGIC);
 | 
						|
 | 
						|
#ifdef DEBUG_MUTEX
 | 
						|
	spin_lock(&mutex_stats_lock);
 | 
						|
	list_del_init(&mp->km_list);
 | 
						|
	spin_unlock(&mutex_stats_lock);
 | 
						|
 | 
						|
	kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
 | 
						|
#endif
 | 
						|
	kmem_free(mp->km_name, mp->km_name_size);
 | 
						|
	kmem_free(mp->km_sem, sizeof(struct semaphore));
 | 
						|
 | 
						|
	memset(mp, KM_POISON, sizeof(*mp));
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__spl_mutex_destroy);
 | 
						|
 | 
						|
/* Return 1 if we acquired the mutex, else zero.  */
 | 
						|
int
 | 
						|
__mutex_tryenter(kmutex_t *mp)
 | 
						|
{
 | 
						|
	int rc;
 | 
						|
	ENTRY;
 | 
						|
 | 
						|
	ASSERT(mp);
 | 
						|
	ASSERT(mp->km_magic == KM_MAGIC);
 | 
						|
	MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
 | 
						|
	MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
 | 
						|
 | 
						|
	rc = down_trylock(mp->km_sem);
 | 
						|
	if (rc == 0) {
 | 
						|
		ASSERT(mp->km_owner == NULL);
 | 
						|
		mp->km_owner = current;
 | 
						|
		MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
 | 
						|
		MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
 | 
						|
	}
 | 
						|
 | 
						|
	RETURN(!rc);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__mutex_tryenter);
 | 
						|
 | 
						|
#ifndef HAVE_TASK_CURR
 | 
						|
#define task_curr(owner)                0
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
mutex_enter_adaptive(kmutex_t *mp)
 | 
						|
{
 | 
						|
	struct task_struct *owner;
 | 
						|
	int count = 0;
 | 
						|
 | 
						|
	/* Lock is not held so we expect to aquire the lock */
 | 
						|
	if ((owner = mp->km_owner) == NULL) {
 | 
						|
		down(mp->km_sem);
 | 
						|
		MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
 | 
						|
		MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
 | 
						|
	} else {
 | 
						|
		/* The lock is held by a currently running task which
 | 
						|
		 * we expect will drop the lock before leaving the
 | 
						|
		 * head of the runqueue.  So the ideal thing to do
 | 
						|
		 * is spin until we aquire the lock and avoid a
 | 
						|
		 * context switch.  However it is also possible the
 | 
						|
		 * task holding the lock yields the processor with
 | 
						|
		 * out dropping lock.  In which case, we know it's
 | 
						|
		 * going to be a while so we stop spinning and go
 | 
						|
		 * to sleep waiting for the lock to be available.
 | 
						|
		 * This should strike the optimum balance between
 | 
						|
		 * spinning and sleeping waiting for a lock.
 | 
						|
		 */
 | 
						|
		while (task_curr(owner) && (count <= mutex_spin_max)) {
 | 
						|
			if (down_trylock(mp->km_sem) == 0) {
 | 
						|
				MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
 | 
						|
				MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
 | 
						|
				GOTO(out, count);
 | 
						|
			}
 | 
						|
			count++;
 | 
						|
		}
 | 
						|
 | 
						|
		/* The lock is held by a sleeping task so it's going to
 | 
						|
		 * cost us minimally one context switch.  We might as
 | 
						|
		 * well sleep and yield the processor to other tasks.
 | 
						|
		 */
 | 
						|
		down(mp->km_sem);
 | 
						|
		MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
 | 
						|
		MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
 | 
						|
	}
 | 
						|
out:
 | 
						|
	MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
 | 
						|
	MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
 | 
						|
}
 | 
						|
 | 
						|
void
 | 
						|
__mutex_enter(kmutex_t *mp)
 | 
						|
{
 | 
						|
	ENTRY;
 | 
						|
	ASSERT(mp);
 | 
						|
	ASSERT(mp->km_magic == KM_MAGIC);
 | 
						|
 | 
						|
	switch (mp->km_type) {
 | 
						|
		case MUTEX_SPIN:
 | 
						|
			while (down_trylock(mp->km_sem));
 | 
						|
			MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
 | 
						|
			MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
 | 
						|
			break;
 | 
						|
		case MUTEX_ADAPTIVE:
 | 
						|
			mutex_enter_adaptive(mp);
 | 
						|
			break;
 | 
						|
	}
 | 
						|
 | 
						|
	ASSERT(mp->km_owner == NULL);
 | 
						|
	mp->km_owner = current;
 | 
						|
 | 
						|
	EXIT;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__mutex_enter);
 | 
						|
 | 
						|
void
 | 
						|
__mutex_exit(kmutex_t *mp)
 | 
						|
{
 | 
						|
	ENTRY;
 | 
						|
	ASSERT(mp);
 | 
						|
	ASSERT(mp->km_magic == KM_MAGIC);
 | 
						|
	ASSERT(mp->km_owner == current);
 | 
						|
	mp->km_owner = NULL;
 | 
						|
	up(mp->km_sem);
 | 
						|
	EXIT;
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__mutex_exit);
 | 
						|
 | 
						|
/* Return 1 if mutex is held by current process, else zero.  */
 | 
						|
int
 | 
						|
__mutex_owned(kmutex_t *mp)
 | 
						|
{
 | 
						|
	ENTRY;
 | 
						|
	ASSERT(mp);
 | 
						|
	ASSERT(mp->km_magic == KM_MAGIC);
 | 
						|
	RETURN(mp->km_owner == current);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__mutex_owned);
 | 
						|
 | 
						|
/* Return owner if mutex is owned, else NULL.  */
 | 
						|
kthread_t *
 | 
						|
__spl_mutex_owner(kmutex_t *mp)
 | 
						|
{
 | 
						|
	ENTRY;
 | 
						|
	ASSERT(mp);
 | 
						|
	ASSERT(mp->km_magic == KM_MAGIC);
 | 
						|
	RETURN(mp->km_owner);
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(__spl_mutex_owner);
 | 
						|
 | 
						|
int
 | 
						|
spl_mutex_init(void)
 | 
						|
{
 | 
						|
	ENTRY;
 | 
						|
#ifdef DEBUG_MUTEX
 | 
						|
	spin_lock_init(&mutex_stats_lock);
 | 
						|
        INIT_LIST_HEAD(&mutex_stats_list);
 | 
						|
#endif
 | 
						|
	RETURN(0);
 | 
						|
}
 | 
						|
 | 
						|
void
 | 
						|
spl_mutex_fini(void)
 | 
						|
{
 | 
						|
        ENTRY;
 | 
						|
#ifdef DEBUG_MUTEX
 | 
						|
	ASSERT(list_empty(&mutex_stats_list));
 | 
						|
#endif
 | 
						|
        EXIT;
 | 
						|
}
 | 
						|
 | 
						|
module_param(mutex_spin_max, int, 0644);
 | 
						|
MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to aquire lock");
 |