Add support for rw semaphore under PREEMPT_RT_FULL

The main complication from the RT patch set is that the RW semaphore
locks change such that read locks on an rwsem can be taken only by
a single thread.  All other threads are locked out. This single
thread can take a read lock multiple times though. The underlying
implementation changes to a mutex with an additional read_depth
count.

The implementation can be best understood by inspecting the RT
patch.  rwsem_rt.h and rt.c give the best insight into how RT
rwsem works. My implementation for rwsem_tryupgrade is basically
an inversion of rt_downgrade_write found in rt.c. Please see the
comments in the code.

Unfortunately, I have to drop SPLAT rwlock test4 completely as this
test tries to take multiple locks from different threads, which RT
rwsems do not support.  Otherwise SPLAT, zconfig.sh, zpios-sanity.sh
and zfs-tests.sh pass on my Debian-testing VM with the kernel
linux-image-4.8.0-1-rt-amd64.

Tested-by: kernelOfTruth <kerneloftruth@gmail.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Clemens Fruhwirth <clemens@endorphin.org>
Closes zfsonlinux/zfs#5491
Closes #589
Closes #308
This commit is contained in:
Clemens Fruhwirth 2016-12-17 17:09:57 +01:00 committed by Brian Behlendorf
parent 6d064f7a07
commit 8e99d66b05
3 changed files with 62 additions and 4 deletions

View File

@ -27,7 +27,10 @@
#include <linux/rwsem.h> #include <linux/rwsem.h>
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK #if defined(CONFIG_PREEMPT_RT_FULL)
#define SPL_RWSEM_SINGLE_READER_VALUE (1)
#define SPL_RWSEM_SINGLE_WRITER_VALUE (0)
#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
#define SPL_RWSEM_SINGLE_READER_VALUE (1) #define SPL_RWSEM_SINGLE_READER_VALUE (1)
#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1) #define SPL_RWSEM_SINGLE_WRITER_VALUE (-1)
#else #else
@ -36,7 +39,9 @@
#endif #endif
/* Linux 3.16 changed activity to count for rwsem-spinlock */ /* Linux 3.16 changed activity to count for rwsem-spinlock */
#if defined(HAVE_RWSEM_ACTIVITY) #if defined(CONFIG_PREEMPT_RT_FULL)
#define RWSEM_COUNT(sem) sem->read_depth
#elif defined(HAVE_RWSEM_ACTIVITY)
#define RWSEM_COUNT(sem) sem->activity #define RWSEM_COUNT(sem) sem->activity
/* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */ /* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */
#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT) #elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT)

View File

@ -32,7 +32,37 @@
#define DEBUG_SUBSYSTEM S_RWLOCK #define DEBUG_SUBSYSTEM S_RWLOCK
#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK) #if defined(CONFIG_PREEMPT_RT_FULL)
#include <linux/rtmutex.h>
static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
ASSERT(rt_mutex_owner(&rwsem->lock) == current);
/*
* Under the realtime patch series, rwsem is implemented as a
* single mutex held by readers and writers alike. However,
* this implementation would prevent a thread from taking a
* read lock twice, as the mutex would already be locked on
* the second attempt. Therefore the implementation allows a
* single thread to take a rwsem as read lock multiple times
* tracking that nesting as read_depth counter.
*/
if (rwsem->read_depth <= 1) {
/*
* In case, the current thread has not taken the lock
* more than once as read lock, we can allow an
* upgrade to a write lock. rwsem_rt.h implements
* write locks as read_depth == 0.
*/
rwsem->read_depth = 0;
return (1);
}
return (0);
}
#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
static int static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem) __rwsem_tryupgrade(struct rw_semaphore *rwsem)
{ {

View File

@ -106,6 +106,17 @@ void splat_init_rw_priv(rw_priv_t *rwp, struct file *file)
rwp->rw_type = 0; rwp->rw_type = 0;
} }
#if defined(CONFIG_PREEMPT_RT_FULL)
static int
splat_rwlock_test1(struct file *file, void *arg)
{
/*
* This test will never succeed on PREEMPT_RT_FULL because these
* kernels only allow a single thread to hold the lock.
*/
return 0;
}
#else
static int static int
splat_rwlock_wr_thr(void *arg) splat_rwlock_wr_thr(void *arg)
{ {
@ -297,6 +308,7 @@ splat_rwlock_test1(struct file *file, void *arg)
return rc; return rc;
} }
#endif
static void static void
splat_rwlock_test2_func(void *arg) splat_rwlock_test2_func(void *arg)
@ -514,11 +526,22 @@ splat_rwlock_test4(struct file *file, void *arg)
splat_init_rw_priv(rwp, file); splat_init_rw_priv(rwp, file);
/* Validate all combinations of rw_tryenter() contention */ /*
* Validate all combinations of rw_tryenter() contention.
*
* The concurrent reader test is modified for PREEMPT_RT_FULL
* kernels which do not permit concurrent read locks to be taken
* from different threads. The same thread is allowed to take
* the read lock multiple times.
*/
rc1 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_WRITER); rc1 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_WRITER);
rc2 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_READER); rc2 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_READER);
rc3 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_READER, RW_WRITER); rc3 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_READER, RW_WRITER);
#if defined(CONFIG_PREEMPT_RT_FULL)
rc4 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_READER, RW_READER);
#else
rc4 = splat_rwlock_test4_type(tq, rwp, 0, RW_READER, RW_READER); rc4 = splat_rwlock_test4_type(tq, rwp, 0, RW_READER, RW_READER);
#endif
rc5 = splat_rwlock_test4_type(tq, rwp, 0, RW_NONE, RW_WRITER); rc5 = splat_rwlock_test4_type(tq, rwp, 0, RW_NONE, RW_WRITER);
rc6 = splat_rwlock_test4_type(tq, rwp, 0, RW_NONE, RW_READER); rc6 = splat_rwlock_test4_type(tq, rwp, 0, RW_NONE, RW_READER);