mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2024-12-28 20:09:35 +03:00
57d862349b
working on this branch for the next few days I suggested you work off of the 0.3.1 tag. The following changes are fairly extensive and are designed to make the SPL compatible with all kernels in the range of 2.6.18-2.6.25. There were 13 relevant API changes between these releases and I have added the needed autoconf tests to check for them. However, this has not all been tested extensively. I'll sort of the breakage on Fedora Core 9 and RHEL5 this week. SPL_AC_TYPE_UINTPTR_T SPL_AC_TYPE_KMEM_CACHE_T SPL_AC_KMEM_CACHE_DESTROY_INT SPL_AC_ATOMIC_PANIC_NOTIFIER SPL_AC_3ARGS_INIT_WORK SPL_AC_2ARGS_REGISTER_SYSCTL SPL_AC_KMEM_CACHE_T SPL_AC_KMEM_CACHE_CREATE_DTOR SPL_AC_3ARG_KMEM_CACHE_CREATE_CTOR SPL_AC_SET_SHRINKER SPL_AC_PATH_IN_NAMEIDATA SPL_AC_TASK_CURR SPL_AC_CTL_UNNUMBERED git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@119 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
308 lines
8.0 KiB
C
308 lines
8.0 KiB
C
/*
|
|
* This file is part of the SPL: Solaris Porting Layer.
|
|
*
|
|
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
|
|
* Produced at Lawrence Livermore National Laboratory
|
|
* Written by:
|
|
* Brian Behlendorf <behlendorf1@llnl.gov>,
|
|
* Herb Wartens <wartens2@llnl.gov>,
|
|
* Jim Garlick <garlick@llnl.gov>
|
|
* UCRL-CODE-235197
|
|
*
|
|
* This is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
#include <sys/mutex.h>
|
|
|
|
#ifdef DEBUG_SUBSYSTEM
|
|
#undef DEBUG_SUBSYSTEM
|
|
#endif
|
|
|
|
#define DEBUG_SUBSYSTEM S_MUTEX
|
|
|
|
/* Mutex implementation based on those found in Solaris. This means
|
|
* they the MUTEX_DEFAULT type is an adaptive mutex. When calling
|
|
* mutex_enter() your process will spin waiting for the lock if it's
|
|
* likely the lock will be free'd shortly. If it looks like the
|
|
* lock will be held for a longer time we schedule and sleep waiting
|
|
* for it. This determination is made by checking if the holder of
|
|
* the lock is currently running on cpu or sleeping waiting to be
|
|
* scheduled. If the holder is currently running it's likely the
|
|
* lock will be shortly dropped.
|
|
*
|
|
* XXX: This is basically a rough implementation to see if this
|
|
* helps our performance. If it does a more careful implementation
|
|
* should be done, perhaps in assembly.
|
|
*/
|
|
|
|
/* 0: Never spin when trying to aquire lock
|
|
* -1: Spin until aquired or holder yeilds without dropping lock
|
|
* 1-MAX_INT: Spin for N attempts before sleeping for lock
|
|
*/
|
|
int mutex_spin_max = 0;
|
|
|
|
#ifdef DEBUG_MUTEX
|
|
int mutex_stats[MUTEX_STATS_SIZE] = { 0 };
|
|
spinlock_t mutex_stats_lock;
|
|
struct list_head mutex_stats_list;
|
|
#endif
|
|
|
|
void
|
|
__spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc)
|
|
{
|
|
int flags = KM_SLEEP;
|
|
|
|
ASSERT(mp);
|
|
ASSERT(name);
|
|
ASSERT(ibc == NULL);
|
|
ASSERT(mp->km_magic != KM_MAGIC); /* Never double init */
|
|
|
|
mp->km_magic = KM_MAGIC;
|
|
mp->km_owner = NULL;
|
|
mp->km_name = NULL;
|
|
mp->km_name_size = strlen(name) + 1;
|
|
|
|
switch (type) {
|
|
case MUTEX_DEFAULT:
|
|
mp->km_type = MUTEX_ADAPTIVE;
|
|
break;
|
|
case MUTEX_SPIN:
|
|
case MUTEX_ADAPTIVE:
|
|
mp->km_type = type;
|
|
break;
|
|
default:
|
|
SBUG();
|
|
}
|
|
|
|
/* We may be called when there is a non-zero preempt_count or
|
|
* interrupts are disabled is which case we must not sleep.
|
|
*/
|
|
if (current_thread_info()->preempt_count || irqs_disabled())
|
|
flags = KM_NOSLEEP;
|
|
|
|
/* Semaphore kmem_alloc'ed to keep struct size down (<64b) */
|
|
mp->km_sem = kmem_alloc(sizeof(struct semaphore), flags);
|
|
if (mp->km_sem == NULL)
|
|
return;
|
|
|
|
mp->km_name = kmem_alloc(mp->km_name_size, flags);
|
|
if (mp->km_name == NULL) {
|
|
kmem_free(mp->km_sem, sizeof(struct semaphore));
|
|
return;
|
|
}
|
|
|
|
sema_init(mp->km_sem, 1);
|
|
strncpy(mp->km_name, name, mp->km_name_size);
|
|
|
|
#ifdef DEBUG_MUTEX
|
|
mp->km_stats = kmem_zalloc(sizeof(int) * MUTEX_STATS_SIZE, flags);
|
|
if (mp->km_stats == NULL) {
|
|
kmem_free(mp->km_name, mp->km_name_size);
|
|
kmem_free(mp->km_sem, sizeof(struct semaphore));
|
|
return;
|
|
}
|
|
|
|
/* XXX - This appears to be a much more contended lock than I
|
|
* would have expected. To run with this debugging enabled and
|
|
* get reasonable performance we may need to be more clever and
|
|
* do something like hash the mutex ptr on to one of several
|
|
* lists to ease this single point of contention.
|
|
*/
|
|
spin_lock(&mutex_stats_lock);
|
|
list_add_tail(&mp->km_list, &mutex_stats_list);
|
|
spin_unlock(&mutex_stats_lock);
|
|
#endif
|
|
}
|
|
EXPORT_SYMBOL(__spl_mutex_init);
|
|
|
|
void
|
|
__spl_mutex_destroy(kmutex_t *mp)
|
|
{
|
|
ASSERT(mp);
|
|
ASSERT(mp->km_magic == KM_MAGIC);
|
|
|
|
#ifdef DEBUG_MUTEX
|
|
spin_lock(&mutex_stats_lock);
|
|
list_del_init(&mp->km_list);
|
|
spin_unlock(&mutex_stats_lock);
|
|
|
|
kmem_free(mp->km_stats, sizeof(int) * MUTEX_STATS_SIZE);
|
|
#endif
|
|
kmem_free(mp->km_name, mp->km_name_size);
|
|
kmem_free(mp->km_sem, sizeof(struct semaphore));
|
|
|
|
memset(mp, KM_POISON, sizeof(*mp));
|
|
}
|
|
EXPORT_SYMBOL(__spl_mutex_destroy);
|
|
|
|
/* Return 1 if we acquired the mutex, else zero. */
|
|
int
|
|
__mutex_tryenter(kmutex_t *mp)
|
|
{
|
|
int rc;
|
|
ENTRY;
|
|
|
|
ASSERT(mp);
|
|
ASSERT(mp->km_magic == KM_MAGIC);
|
|
MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_TOTAL);
|
|
MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_TOTAL);
|
|
|
|
rc = down_trylock(mp->km_sem);
|
|
if (rc == 0) {
|
|
ASSERT(mp->km_owner == NULL);
|
|
mp->km_owner = current;
|
|
MUTEX_STAT_INC(mutex_stats, MUTEX_TRYENTER_NOT_HELD);
|
|
MUTEX_STAT_INC(mp->km_stats, MUTEX_TRYENTER_NOT_HELD);
|
|
}
|
|
|
|
RETURN(!rc);
|
|
}
|
|
EXPORT_SYMBOL(__mutex_tryenter);
|
|
|
|
#ifndef HAVE_TASK_CURR
|
|
#define task_curr(owner) 0
|
|
#endif
|
|
|
|
|
|
static void
|
|
mutex_enter_adaptive(kmutex_t *mp)
|
|
{
|
|
struct task_struct *owner;
|
|
int count = 0;
|
|
|
|
/* Lock is not held so we expect to aquire the lock */
|
|
if ((owner = mp->km_owner) == NULL) {
|
|
down(mp->km_sem);
|
|
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_NOT_HELD);
|
|
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_NOT_HELD);
|
|
} else {
|
|
/* The lock is held by a currently running task which
|
|
* we expect will drop the lock before leaving the
|
|
* head of the runqueue. So the ideal thing to do
|
|
* is spin until we aquire the lock and avoid a
|
|
* context switch. However it is also possible the
|
|
* task holding the lock yields the processor with
|
|
* out dropping lock. In which case, we know it's
|
|
* going to be a while so we stop spinning and go
|
|
* to sleep waiting for the lock to be available.
|
|
* This should strike the optimum balance between
|
|
* spinning and sleeping waiting for a lock.
|
|
*/
|
|
while (task_curr(owner) && (count <= mutex_spin_max)) {
|
|
if (down_trylock(mp->km_sem) == 0) {
|
|
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
|
|
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
|
|
GOTO(out, count);
|
|
}
|
|
count++;
|
|
}
|
|
|
|
/* The lock is held by a sleeping task so it's going to
|
|
* cost us minimally one context switch. We might as
|
|
* well sleep and yield the processor to other tasks.
|
|
*/
|
|
down(mp->km_sem);
|
|
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SLEEP);
|
|
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SLEEP);
|
|
}
|
|
out:
|
|
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_TOTAL);
|
|
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_TOTAL);
|
|
}
|
|
|
|
void
|
|
__mutex_enter(kmutex_t *mp)
|
|
{
|
|
ENTRY;
|
|
ASSERT(mp);
|
|
ASSERT(mp->km_magic == KM_MAGIC);
|
|
|
|
switch (mp->km_type) {
|
|
case MUTEX_SPIN:
|
|
while (down_trylock(mp->km_sem));
|
|
MUTEX_STAT_INC(mutex_stats, MUTEX_ENTER_SPIN);
|
|
MUTEX_STAT_INC(mp->km_stats, MUTEX_ENTER_SPIN);
|
|
break;
|
|
case MUTEX_ADAPTIVE:
|
|
mutex_enter_adaptive(mp);
|
|
break;
|
|
}
|
|
|
|
ASSERT(mp->km_owner == NULL);
|
|
mp->km_owner = current;
|
|
|
|
EXIT;
|
|
}
|
|
EXPORT_SYMBOL(__mutex_enter);
|
|
|
|
void
|
|
__mutex_exit(kmutex_t *mp)
|
|
{
|
|
ENTRY;
|
|
ASSERT(mp);
|
|
ASSERT(mp->km_magic == KM_MAGIC);
|
|
ASSERT(mp->km_owner == current);
|
|
mp->km_owner = NULL;
|
|
up(mp->km_sem);
|
|
EXIT;
|
|
}
|
|
EXPORT_SYMBOL(__mutex_exit);
|
|
|
|
/* Return 1 if mutex is held by current process, else zero. */
|
|
int
|
|
__mutex_owned(kmutex_t *mp)
|
|
{
|
|
ENTRY;
|
|
ASSERT(mp);
|
|
ASSERT(mp->km_magic == KM_MAGIC);
|
|
RETURN(mp->km_owner == current);
|
|
}
|
|
EXPORT_SYMBOL(__mutex_owned);
|
|
|
|
/* Return owner if mutex is owned, else NULL. */
|
|
kthread_t *
|
|
__spl_mutex_owner(kmutex_t *mp)
|
|
{
|
|
ENTRY;
|
|
ASSERT(mp);
|
|
ASSERT(mp->km_magic == KM_MAGIC);
|
|
RETURN(mp->km_owner);
|
|
}
|
|
EXPORT_SYMBOL(__spl_mutex_owner);
|
|
|
|
int
|
|
spl_mutex_init(void)
|
|
{
|
|
ENTRY;
|
|
#ifdef DEBUG_MUTEX
|
|
spin_lock_init(&mutex_stats_lock);
|
|
INIT_LIST_HEAD(&mutex_stats_list);
|
|
#endif
|
|
RETURN(0);
|
|
}
|
|
|
|
void
|
|
spl_mutex_fini(void)
|
|
{
|
|
ENTRY;
|
|
#ifdef DEBUG_MUTEX
|
|
ASSERT(list_empty(&mutex_stats_list));
|
|
#endif
|
|
EXIT;
|
|
}
|
|
|
|
module_param(mutex_spin_max, int, 0644);
|
|
MODULE_PARM_DESC(mutex_spin_max, "Spin a maximum of N times to aquire lock");
|