mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-14 20:20:26 +03:00
4d54fdee1d
For a generic explanation of why mutexs needed to be reimplemented to work with the kernel lock profiling see commits:e811949a57
andd28db80fd0
The specific changes made to the mutex implemetation are as follows. The Linux mutex structure is now directly embedded in the kmutex_t. This allows a kmutex_t to be directly case to a mutex struct and passed directly to the Linux primative. Just like with the rwlocks it is critical that these functions be implemented as '#defines to ensure the location information is preserved. The preprocessor can then do a direct replacement of the Solaris primative with the linux primative. Just as with the rwlocks we need to track the lock owner. Here things get a little more interesting because depending on your kernel version, and how you've built your kernel Linux may already do this for you. If your running a 2.6.29 or newer kernel on a SMP system the lock owner will be tracked. This was added to Linux to support adaptive mutexs, more on that shortly. Alternately, your kernel might track the lock owner if you've set CONFIG_DEBUG_MUTEXES in the kernel build. If neither of the above things is true for your kernel the kmutex_t type will include and track the lock owner to ensure correct behavior. This is all handled by a new autoconf check called SPL_AC_MUTEX_OWNER. Concerning adaptive mutexs these are a very recent development and they did not make it in to either the latest FC11 of SLES11 kernels. Ideally, I'd love to see this kernel change appear in one of these distros because it does help performance. From Linux kernel commit: 0d66bf6d3514b35eb6897629059443132992dbd7 "Testing with Ingo's test-mutex application... gave a 345% boost for VFS scalability on my testbox" However, if you don't want to backport this change yourself you can still simply export the task_curr() symbol. The kmutex_t implementation will use this symbol when it's available to provide it's own adaptive mutexs. Finally, DEBUG_MUTEX support was removed including the proc handlers. This was done because now that we are cleanly integrated with the kernel profiling all this information and much much more is available in debug kernel builds. This code was now redundant. Update mutexs validated on: - SLES10 (ppc64) - SLES11 (x86_64) - CHAOS4.2 (x86_64) - RHEL5.3 (x86_64) - RHEL6 (x86_64) - FC11 (x86_64)
362 lines
12 KiB
C
362 lines
12 KiB
C
/*
|
|
* This file is part of the SPL: Solaris Porting Layer.
|
|
*
|
|
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
|
|
* Produced at Lawrence Livermore National Laboratory
|
|
* Written by:
|
|
* Brian Behlendorf <behlendorf1@llnl.gov>,
|
|
* Herb Wartens <wartens2@llnl.gov>,
|
|
* Jim Garlick <garlick@llnl.gov>
|
|
* UCRL-CODE-235197
|
|
*
|
|
* This is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*/
|
|
|
|
#include "splat-internal.h"
|
|
|
|
#define SPLAT_MUTEX_NAME "mutex"
|
|
#define SPLAT_MUTEX_DESC "Kernel Mutex Tests"
|
|
|
|
#define SPLAT_MUTEX_TEST1_ID 0x0401
|
|
#define SPLAT_MUTEX_TEST1_NAME "tryenter"
|
|
#define SPLAT_MUTEX_TEST1_DESC "Validate mutex_tryenter() correctness"
|
|
|
|
#define SPLAT_MUTEX_TEST2_ID 0x0402
|
|
#define SPLAT_MUTEX_TEST2_NAME "race"
|
|
#define SPLAT_MUTEX_TEST2_DESC "Many threads entering/exiting the mutex"
|
|
|
|
#define SPLAT_MUTEX_TEST3_ID 0x0403
|
|
#define SPLAT_MUTEX_TEST3_NAME "owned"
|
|
#define SPLAT_MUTEX_TEST3_DESC "Validate mutex_owned() correctness"
|
|
|
|
#define SPLAT_MUTEX_TEST4_ID 0x0404
|
|
#define SPLAT_MUTEX_TEST4_NAME "owner"
|
|
#define SPLAT_MUTEX_TEST4_DESC "Validate mutex_owner() correctness"
|
|
|
|
#define SPLAT_MUTEX_TEST_MAGIC 0x115599DDUL
|
|
#define SPLAT_MUTEX_TEST_NAME "mutex_test"
|
|
#define SPLAT_MUTEX_TEST_TASKQ "mutex_taskq"
|
|
#define SPLAT_MUTEX_TEST_COUNT 128
|
|
|
|
typedef struct mutex_priv {
|
|
unsigned long mp_magic;
|
|
struct file *mp_file;
|
|
kmutex_t mp_mtx;
|
|
int mp_rc;
|
|
} mutex_priv_t;
|
|
|
|
static void
|
|
splat_mutex_test1_func(void *arg)
|
|
{
|
|
mutex_priv_t *mp = (mutex_priv_t *)arg;
|
|
ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC);
|
|
|
|
if (mutex_tryenter(&mp->mp_mtx)) {
|
|
mp->mp_rc = 0;
|
|
mutex_exit(&mp->mp_mtx);
|
|
} else {
|
|
mp->mp_rc = -EBUSY;
|
|
}
|
|
}
|
|
|
|
static int
|
|
splat_mutex_test1(struct file *file, void *arg)
|
|
{
|
|
mutex_priv_t *mp;
|
|
taskq_t *tq;
|
|
int id, rc = 0;
|
|
|
|
mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL);
|
|
if (mp == NULL)
|
|
return -ENOMEM;
|
|
|
|
tq = taskq_create(SPLAT_MUTEX_TEST_TASKQ, 1, maxclsyspri,
|
|
50, INT_MAX, TASKQ_PREPOPULATE);
|
|
if (tq == NULL) {
|
|
rc = -ENOMEM;
|
|
goto out2;
|
|
}
|
|
|
|
mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC;
|
|
mp->mp_file = file;
|
|
mutex_init(&mp->mp_mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL);
|
|
mutex_enter(&mp->mp_mtx);
|
|
|
|
/*
|
|
* Schedule a task function which will try and acquire the mutex via
|
|
* mutex_tryenter() while it's held. This should fail and the task
|
|
* function will indicate this status in the passed private data.
|
|
*/
|
|
mp->mp_rc = -EINVAL;
|
|
id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP);
|
|
if (id == 0) {
|
|
mutex_exit(&mp->mp_mtx);
|
|
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
|
|
"taskq_dispatch() failed\n");
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
taskq_wait_id(tq, id);
|
|
mutex_exit(&mp->mp_mtx);
|
|
|
|
/* Task function successfully acquired mutex, very bad! */
|
|
if (mp->mp_rc != -EBUSY) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME,
|
|
"mutex_trylock() incorrectly succeeded when "
|
|
"the mutex was held, %d/%d\n", id, mp->mp_rc);
|
|
rc = -EINVAL;
|
|
goto out;
|
|
} else {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
|
|
"mutex_trylock() correctly failed when "
|
|
"the mutex was held\n");
|
|
}
|
|
|
|
/*
|
|
* Schedule a task function which will try and acquire the mutex via
|
|
* mutex_tryenter() while it is not held. This should succeed and
|
|
* can be verified by checking the private data.
|
|
*/
|
|
mp->mp_rc = -EINVAL;
|
|
id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP);
|
|
if (id == 0) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
|
|
"taskq_dispatch() failed\n");
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
taskq_wait_id(tq, id);
|
|
|
|
/* Task function failed to acquire mutex, very bad! */
|
|
if (mp->mp_rc != 0) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME,
|
|
"mutex_trylock() incorrectly failed when "
|
|
"the mutex was not held, %d/%d\n", id, mp->mp_rc);
|
|
rc = -EINVAL;
|
|
} else {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
|
|
"mutex_trylock() correctly succeeded "
|
|
"when the mutex was not held\n");
|
|
}
|
|
out:
|
|
taskq_destroy(tq);
|
|
mutex_destroy(&(mp->mp_mtx));
|
|
out2:
|
|
kfree(mp);
|
|
return rc;
|
|
}
|
|
|
|
static void
|
|
splat_mutex_test2_func(void *arg)
|
|
{
|
|
mutex_priv_t *mp = (mutex_priv_t *)arg;
|
|
int rc;
|
|
ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC);
|
|
|
|
/* Read the value before sleeping and write it after we wake up to
|
|
* maximize the chance of a race if mutexs are not working properly */
|
|
mutex_enter(&mp->mp_mtx);
|
|
rc = mp->mp_rc;
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
schedule_timeout(HZ / 100); /* 1/100 of a second */
|
|
VERIFY(mp->mp_rc == rc);
|
|
mp->mp_rc = rc + 1;
|
|
mutex_exit(&mp->mp_mtx);
|
|
}
|
|
|
|
static int
|
|
splat_mutex_test2(struct file *file, void *arg)
|
|
{
|
|
mutex_priv_t *mp;
|
|
taskq_t *tq;
|
|
int i, rc = 0;
|
|
|
|
mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL);
|
|
if (mp == NULL)
|
|
return -ENOMEM;
|
|
|
|
/* Create several threads allowing tasks to race with each other */
|
|
tq = taskq_create(SPLAT_MUTEX_TEST_TASKQ, num_online_cpus(),
|
|
maxclsyspri, 50, INT_MAX, TASKQ_PREPOPULATE);
|
|
if (tq == NULL) {
|
|
rc = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
mp->mp_magic = SPLAT_MUTEX_TEST_MAGIC;
|
|
mp->mp_file = file;
|
|
mutex_init(&(mp->mp_mtx), SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL);
|
|
mp->mp_rc = 0;
|
|
|
|
/*
|
|
* Schedule N work items to the work queue each of which enters the
|
|
* mutex, sleeps briefly, then exits the mutex. On a multiprocessor
|
|
* box these work items will be handled by all available CPUs. The
|
|
* task function checks to ensure the tracked shared variable is
|
|
* always only incremented by one. Additionally, the mutex itself
|
|
* is instrumented such that if any two processors are in the
|
|
* critical region at the same time the system will panic. If the
|
|
* mutex is implemented right this will never happy, that's a pass.
|
|
*/
|
|
for (i = 0; i < SPLAT_MUTEX_TEST_COUNT; i++) {
|
|
if (!taskq_dispatch(tq, splat_mutex_test2_func, mp, TQ_SLEEP)) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST2_NAME,
|
|
"Failed to queue task %d\n", i);
|
|
rc = -EINVAL;
|
|
}
|
|
}
|
|
|
|
taskq_wait(tq);
|
|
|
|
if (mp->mp_rc == SPLAT_MUTEX_TEST_COUNT) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads "
|
|
"correctly entered/exited the mutex %d times\n",
|
|
num_online_cpus(), mp->mp_rc);
|
|
} else {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST2_NAME, "%d racing threads "
|
|
"only processed %d/%d mutex work items\n",
|
|
num_online_cpus(),mp->mp_rc,SPLAT_MUTEX_TEST_COUNT);
|
|
rc = -EINVAL;
|
|
}
|
|
|
|
taskq_destroy(tq);
|
|
mutex_destroy(&(mp->mp_mtx));
|
|
out:
|
|
kfree(mp);
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
splat_mutex_test3(struct file *file, void *arg)
|
|
{
|
|
kmutex_t mtx;
|
|
int rc = 0;
|
|
|
|
mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL);
|
|
mutex_enter(&mtx);
|
|
|
|
/* Mutex should be owned by current */
|
|
if (!mutex_owned(&mtx)) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Unowned mutex "
|
|
"should be owned by pid %d\n", current->pid);
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
mutex_exit(&mtx);
|
|
|
|
/* Mutex should not be owned by any task */
|
|
if (mutex_owned(&mtx)) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex owned by "
|
|
"pid %d should be unowned\b", current->pid);
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s",
|
|
"Correct mutex_owned() behavior\n");
|
|
out:
|
|
mutex_destroy(&mtx);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static int
|
|
splat_mutex_test4(struct file *file, void *arg)
|
|
{
|
|
kmutex_t mtx;
|
|
kthread_t *owner;
|
|
int rc = 0;
|
|
|
|
mutex_init(&mtx, SPLAT_MUTEX_TEST_NAME, MUTEX_DEFAULT, NULL);
|
|
mutex_enter(&mtx);
|
|
|
|
/* Mutex should be owned by current */
|
|
owner = mutex_owner(&mtx);
|
|
if (current != owner) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should "
|
|
"be owned by pid %d but is owned by pid %d\n",
|
|
current->pid, owner ? owner->pid : -1);
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
mutex_exit(&mtx);
|
|
|
|
/* Mutex should not be owned by any task */
|
|
owner = mutex_owner(&mtx);
|
|
if (owner) {
|
|
splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Mutex should not "
|
|
"be owned but is owned by pid %d\n", owner->pid);
|
|
rc = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "%s",
|
|
"Correct mutex_owner() behavior\n");
|
|
out:
|
|
mutex_destroy(&mtx);
|
|
|
|
return rc;
|
|
}
|
|
|
|
splat_subsystem_t *
|
|
splat_mutex_init(void)
|
|
{
|
|
splat_subsystem_t *sub;
|
|
|
|
sub = kmalloc(sizeof(*sub), GFP_KERNEL);
|
|
if (sub == NULL)
|
|
return NULL;
|
|
|
|
memset(sub, 0, sizeof(*sub));
|
|
strncpy(sub->desc.name, SPLAT_MUTEX_NAME, SPLAT_NAME_SIZE);
|
|
strncpy(sub->desc.desc, SPLAT_MUTEX_DESC, SPLAT_DESC_SIZE);
|
|
INIT_LIST_HEAD(&sub->subsystem_list);
|
|
INIT_LIST_HEAD(&sub->test_list);
|
|
spin_lock_init(&sub->test_lock);
|
|
sub->desc.id = SPLAT_SUBSYSTEM_MUTEX;
|
|
|
|
SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST1_NAME, SPLAT_MUTEX_TEST1_DESC,
|
|
SPLAT_MUTEX_TEST1_ID, splat_mutex_test1);
|
|
SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST2_NAME, SPLAT_MUTEX_TEST2_DESC,
|
|
SPLAT_MUTEX_TEST2_ID, splat_mutex_test2);
|
|
SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST3_NAME, SPLAT_MUTEX_TEST3_DESC,
|
|
SPLAT_MUTEX_TEST3_ID, splat_mutex_test3);
|
|
SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST4_NAME, SPLAT_MUTEX_TEST4_DESC,
|
|
SPLAT_MUTEX_TEST4_ID, splat_mutex_test4);
|
|
|
|
return sub;
|
|
}
|
|
|
|
void
|
|
splat_mutex_fini(splat_subsystem_t *sub)
|
|
{
|
|
ASSERT(sub);
|
|
SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST4_ID);
|
|
SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST3_ID);
|
|
SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST2_ID);
|
|
SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST1_ID);
|
|
|
|
kfree(sub);
|
|
}
|
|
|
|
int
|
|
splat_mutex_id(void) {
|
|
return SPLAT_SUBSYSTEM_MUTEX;
|
|
}
|