Reimplement mutexs for Linux lock profiling/analysis

For a generic explanation of why mutexs needed to be reimplemented
to work with the kernel lock profiling see commits:
  e811949a57 and
  d28db80fd0

The specific changes made to the mutex implemetation are as follows.
The Linux mutex structure is now directly embedded in the kmutex_t.
This allows a kmutex_t to be directly case to a mutex struct and
passed directly to the Linux primative.

Just like with the rwlocks it is critical that these functions be
implemented as '#defines to ensure the location information is
preserved.  The preprocessor can then do a direct replacement of
the Solaris primative with the linux primative.

Just as with the rwlocks we need to track the lock owner.  Here
things get a little more interesting because depending on your
kernel version, and how you've built your kernel Linux may already
do this for you.  If your running a 2.6.29 or newer kernel on a
SMP system the lock owner will be tracked.  This was added to Linux
to support adaptive mutexs, more on that shortly.  Alternately, your
kernel might track the lock owner if you've set CONFIG_DEBUG_MUTEXES
in the kernel build.  If neither of the above things is true for
your kernel the kmutex_t type will include and track the lock owner
to ensure correct behavior.  This is all handled by a new autoconf
check called SPL_AC_MUTEX_OWNER.

Concerning adaptive mutexs these are a very recent development and
they did not make it in to either the latest FC11 of SLES11 kernels.
Ideally, I'd love to see this kernel change appear in one of these
distros because it does help performance.  From Linux kernel commit:
  0d66bf6d3514b35eb6897629059443132992dbd7
  "Testing with Ingo's test-mutex application...
  gave a 345% boost for VFS scalability on my testbox"
However, if you don't want to backport this change yourself you
can still simply export the task_curr() symbol.  The kmutex_t
implementation will use this symbol when it's available to
provide it's own adaptive mutexs.

Finally, DEBUG_MUTEX support was removed including the proc handlers.
This was done because now that we are cleanly integrated with the
kernel profiling all this information and much much more is available
in debug kernel builds.  This code was now redundant.

Update mutexs validated on:
    - SLES10   (ppc64)
    - SLES11   (x86_64)
    - CHAOS4.2 (x86_64)
    - RHEL5.3  (x86_64)
    - RHEL6    (x86_64)
    - FC11     (x86_64)
This commit is contained in:
Brian Behlendorf
2009-09-25 14:47:01 -07:00
parent d28db80fd0
commit 4d54fdee1d
8 changed files with 596 additions and 835 deletions
+1
View File
@@ -33,6 +33,7 @@ extern "C" {
#include <linux/module.h>
#include <linux/wait.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
/* The kcondvar_t struct is protected by mutex taken externally before
+161 -72
View File
@@ -1,7 +1,7 @@
/*
* This file is part of the SPL: Solaris Porting Layer.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Copyright (c) 2009 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
@@ -25,88 +25,177 @@
*/
#ifndef _SPL_MUTEX_H
#define _SPL_MUTEX_H
#define _SPL_MUTEX_H
#ifdef __cplusplus
extern "C" {
#endif
#include <linux/module.h>
#include <linux/hardirq.h>
#include <sys/types.h>
#include <sys/kmem.h>
#include <linux/mutex.h>
#define MUTEX_DEFAULT 0
#define MUTEX_SPIN 1
#define MUTEX_ADAPTIVE 2
typedef enum {
MUTEX_DEFAULT = 0,
MUTEX_SPIN = 1,
MUTEX_ADAPTIVE = 2
} kmutex_type_t;
#define MUTEX_ENTER_TOTAL 0
#define MUTEX_ENTER_NOT_HELD 1
#define MUTEX_ENTER_SPIN 2
#define MUTEX_ENTER_SLEEP 3
#define MUTEX_TRYENTER_TOTAL 4
#define MUTEX_TRYENTER_NOT_HELD 5
#define MUTEX_STATS_SIZE 6
#ifdef HAVE_MUTEX_OWNER
#define KM_MAGIC 0x42424242
#define KM_POISON 0x84
typedef struct mutex kmutex_t;
static inline kthread_t *
mutex_owner(kmutex_t *mp)
{
if (mp->owner)
return (mp->owner)->task;
return NULL;
}
#define mutex_owned(mp) (mutex_owner(mp) == current)
#define MUTEX_HELD(mp) mutex_owned(mp)
#undef mutex_init
#define mutex_init(mp, name, type, ibc) \
({ \
static struct lock_class_key __key; \
ASSERT(type == MUTEX_DEFAULT); \
\
__mutex_init((mp), #mp, &__key); \
})
/* #define mutex_destroy(mp) ((void)0) */
#define mutex_tryenter(mp) mutex_trylock(mp)
#define mutex_enter(mp) mutex_lock(mp)
#define mutex_exit(mp) mutex_unlock(mp)
#else /* HAVE_MUTEX_OWNER */
typedef struct {
int32_t km_magic;
int16_t km_type;
int16_t km_name_size;
char *km_name;
struct task_struct *km_owner;
struct semaphore *km_sem;
#ifdef DEBUG_MUTEX
int *km_stats;
struct list_head km_list;
#endif
struct mutex m_mutex;
kthread_t *m_owner;
} kmutex_t;
extern int mutex_spin_max;
#ifdef HAVE_TASK_CURR
extern int spl_mutex_spin_max(void);
#else /* HAVE_TASK_CURR */
# define task_curr(owner) 0
# define spl_mutex_spin_max() 0
#endif /* HAVE_TASK_CURR */
#ifdef DEBUG_MUTEX
extern int mutex_stats[MUTEX_STATS_SIZE];
extern spinlock_t mutex_stats_lock;
extern struct list_head mutex_stats_list;
#define MUTEX_STAT_INC(stats, stat) ((stats)[stat]++)
#else
#define MUTEX_STAT_INC(stats, stat)
#endif
#define MUTEX(mp) ((struct mutex *)(mp))
static inline kthread_t *
spl_mutex_get_owner(kmutex_t *mp)
{
return mp->m_owner;
}
static inline void
spl_mutex_set_owner(kmutex_t *mp)
{
unsigned long flags;
spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags);
mp->m_owner = current;
spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags);
}
static inline void
spl_mutex_clear_owner(kmutex_t *mp)
{
unsigned long flags;
spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags);
mp->m_owner = NULL;
spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags);
}
static inline kthread_t *
mutex_owner(kmutex_t *mp)
{
unsigned long flags;
kthread_t *owner;
spin_lock_irqsave(&MUTEX(mp)->wait_lock, flags);
owner = spl_mutex_get_owner(mp);
spin_unlock_irqrestore(&MUTEX(mp)->wait_lock, flags);
return owner;
}
#define mutex_owned(mp) (mutex_owner(mp) == current)
#define MUTEX_HELD(mp) mutex_owned(mp)
/*
* The following functions must be a #define and not static inline.
* This ensures that the native linux mutex functions (lock/unlock)
* will be correctly located in the users code which is important
* for the built in kernel lock analysis tools
*/
#undef mutex_init
#define mutex_init(mp, name, type, ibc) \
({ \
static struct lock_class_key __key; \
ASSERT(type == MUTEX_DEFAULT); \
\
__mutex_init(MUTEX(mp), #mp, &__key); \
spl_mutex_clear_owner(mp); \
})
#undef mutex_destroy
#define mutex_destroy(mp) \
({ \
VERIFY(!MUTEX_HELD(mp)); \
})
#define mutex_tryenter(mp) \
({ \
int _rc_; \
\
if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1) \
spl_mutex_set_owner(mp); \
\
_rc_; \
})
/*
* Adaptive mutexs assume that the lock may be held by a task running
* on a different cpu. The expectation is that the task will drop the
* lock before leaving the head of the run queue. So the ideal thing
* to do is spin until we acquire the lock and avoid a context switch.
* However it is also possible the task holding the lock yields the
* processor with out dropping lock. In this case, we know it's going
* to be a while so we stop spinning and go to sleep waiting for the
* lock to be available. This should strike the optimum balance
* between spinning and sleeping waiting for a lock.
*/
#define mutex_enter(mp) \
({ \
kthread_t *_owner_; \
int _rc_, _count_; \
\
_rc_ = 0; \
_count_ = 0; \
_owner_ = mutex_owner(mp); \
\
while (_owner_ && task_curr(_owner_) && \
_count_ <= spl_mutex_spin_max()) { \
if ((_rc_ = mutex_trylock(MUTEX(mp)))) \
break; \
\
_count_++; \
} \
\
if (!_rc_) \
mutex_lock(MUTEX(mp)); \
\
spl_mutex_set_owner(mp); \
})
#define mutex_exit(mp) \
({ \
spl_mutex_clear_owner(mp); \
mutex_unlock(MUTEX(mp)); \
})
#endif /* HAVE_MUTEX_OWNER */
int spl_mutex_init(void);
void spl_mutex_fini(void);
extern int __spl_mutex_init(kmutex_t *mp, char *name, int type, void *ibc);
extern void __spl_mutex_destroy(kmutex_t *mp);
extern int __mutex_tryenter(kmutex_t *mp);
extern void __mutex_enter(kmutex_t *mp);
extern void __mutex_exit(kmutex_t *mp);
extern int __mutex_owned(kmutex_t *mp);
extern kthread_t *__spl_mutex_owner(kmutex_t *mp);
#undef mutex_init
#undef mutex_destroy
#define mutex_init(mp, name, type, ibc) \
({ \
/* May never fail or all subsequent mutex_* calls will ASSERT */\
if ((name) == NULL) \
while(__spl_mutex_init(mp, #mp, type, ibc)); \
else \
while(__spl_mutex_init(mp, name, type, ibc)); \
})
#define mutex_destroy(mp) __spl_mutex_destroy(mp)
#define mutex_tryenter(mp) __mutex_tryenter(mp)
#define mutex_enter(mp) __mutex_enter(mp)
#define mutex_exit(mp) __mutex_exit(mp)
#define mutex_owned(mp) __mutex_owned(mp)
#define mutex_owner(mp) __spl_mutex_owner(mp)
#define MUTEX_HELD(mp) mutex_owned(mp)
#ifdef __cplusplus
}
#endif
#endif /* _SPL_MUTEX_H */
#endif /* _SPL_MUTEX_H */