From 8e99d66b0555fe3d6f5b028e8f03883dbf1399bc Mon Sep 17 00:00:00 2001 From: Clemens Fruhwirth Date: Sat, 17 Dec 2016 17:09:57 +0100 Subject: [PATCH] Add support for rw semaphore under PREEMPT_RT_FULL The main complication from the RT patch set is that the RW semaphore locks change such that read locks on an rwsem can be taken only by a single thread. All other threads are locked out. This single thread can take a read lock multiple times though. The underlying implementation changes to a mutex with an additional read_depth count. The implementation can be best understood by inspecting the RT patch. rwsem_rt.h and rt.c give the best insight into how RT rwsem works. My implementation for rwsem_tryupgrade is basically an inversion of rt_downgrade_write found in rt.c. Please see the comments in the code. Unfortunately, I have to drop SPLAT rwlock test4 completely as this test tries to take multiple locks from different threads, which RT rwsems do not support. Otherwise SPLAT, zconfig.sh, zpios-sanity.sh and zfs-tests.sh pass on my Debian-testing VM with the kernel linux-image-4.8.0-1-rt-amd64. Tested-by: kernelOfTruth Reviewed-by: Brian Behlendorf Signed-off-by: Clemens Fruhwirth Closes zfsonlinux/zfs#5491 Closes #589 Closes #308 --- include/linux/rwsem_compat.h | 9 +++++++-- module/spl/spl-rwlock.c | 32 +++++++++++++++++++++++++++++++- module/splat/splat-rwlock.c | 25 ++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/include/linux/rwsem_compat.h b/include/linux/rwsem_compat.h index c874885b0..de513debe 100644 --- a/include/linux/rwsem_compat.h +++ b/include/linux/rwsem_compat.h @@ -27,7 +27,10 @@ #include -#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK +#if defined(CONFIG_PREEMPT_RT_FULL) +#define SPL_RWSEM_SINGLE_READER_VALUE (1) +#define SPL_RWSEM_SINGLE_WRITER_VALUE (0) +#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK) #define SPL_RWSEM_SINGLE_READER_VALUE (1) #define SPL_RWSEM_SINGLE_WRITER_VALUE (-1) #else @@ -36,7 +39,9 @@ #endif /* Linux 3.16 changed activity to count for rwsem-spinlock */ -#if defined(HAVE_RWSEM_ACTIVITY) +#if defined(CONFIG_PREEMPT_RT_FULL) +#define RWSEM_COUNT(sem) sem->read_depth +#elif defined(HAVE_RWSEM_ACTIVITY) #define RWSEM_COUNT(sem) sem->activity /* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */ #elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT) diff --git a/module/spl/spl-rwlock.c b/module/spl/spl-rwlock.c index 77f46f2d6..9e96c4f27 100644 --- a/module/spl/spl-rwlock.c +++ b/module/spl/spl-rwlock.c @@ -32,7 +32,37 @@ #define DEBUG_SUBSYSTEM S_RWLOCK -#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK) +#if defined(CONFIG_PREEMPT_RT_FULL) + +#include + +static int +__rwsem_tryupgrade(struct rw_semaphore *rwsem) +{ + ASSERT(rt_mutex_owner(&rwsem->lock) == current); + + /* + * Under the realtime patch series, rwsem is implemented as a + * single mutex held by readers and writers alike. However, + * this implementation would prevent a thread from taking a + * read lock twice, as the mutex would already be locked on + * the second attempt. Therefore the implementation allows a + * single thread to take a rwsem as read lock multiple times + * tracking that nesting as read_depth counter. + */ + if (rwsem->read_depth <= 1) { + /* + * In case, the current thread has not taken the lock + * more than once as read lock, we can allow an + * upgrade to a write lock. rwsem_rt.h implements + * write locks as read_depth == 0. + */ + rwsem->read_depth = 0; + return (1); + } + return (0); +} +#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK) static int __rwsem_tryupgrade(struct rw_semaphore *rwsem) { diff --git a/module/splat/splat-rwlock.c b/module/splat/splat-rwlock.c index c11ab94f2..d7626b236 100644 --- a/module/splat/splat-rwlock.c +++ b/module/splat/splat-rwlock.c @@ -106,6 +106,17 @@ void splat_init_rw_priv(rw_priv_t *rwp, struct file *file) rwp->rw_type = 0; } +#if defined(CONFIG_PREEMPT_RT_FULL) +static int +splat_rwlock_test1(struct file *file, void *arg) +{ + /* + * This test will never succeed on PREEMPT_RT_FULL because these + * kernels only allow a single thread to hold the lock. + */ + return 0; +} +#else static int splat_rwlock_wr_thr(void *arg) { @@ -297,6 +308,7 @@ splat_rwlock_test1(struct file *file, void *arg) return rc; } +#endif static void splat_rwlock_test2_func(void *arg) @@ -514,11 +526,22 @@ splat_rwlock_test4(struct file *file, void *arg) splat_init_rw_priv(rwp, file); - /* Validate all combinations of rw_tryenter() contention */ + /* + * Validate all combinations of rw_tryenter() contention. + * + * The concurrent reader test is modified for PREEMPT_RT_FULL + * kernels which do not permit concurrent read locks to be taken + * from different threads. The same thread is allowed to take + * the read lock multiple times. + */ rc1 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_WRITER); rc2 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_READER); rc3 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_READER, RW_WRITER); +#if defined(CONFIG_PREEMPT_RT_FULL) + rc4 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_READER, RW_READER); +#else rc4 = splat_rwlock_test4_type(tq, rwp, 0, RW_READER, RW_READER); +#endif rc5 = splat_rwlock_test4_type(tq, rwp, 0, RW_NONE, RW_WRITER); rc6 = splat_rwlock_test4_type(tq, rwp, 0, RW_NONE, RW_READER);