From c2fa09454ef322a34df58655978e79c1c7fab641 Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Sun, 13 Jul 2014 14:45:20 -0400 Subject: [PATCH] Add hooks for disabling direct reclaim The port of XFS to Linux introduced a thread-specific PF_FSTRANS bit that is used to mark contexts which are processing transactions. When set, allocations in this context can dip into kernel memory reserves to avoid deadlocks during writeback. Linux 3.9 provided the additional PF_MEMALLOC_NOIO for disabling __GFP_IO in page allocations, which XFS began using in 3.15. This patch implements hooks for marking transactions via PF_FSTRANS. When an allocation is performed in the context of PF_FSTRANS, any KM_SLEEP allocation is transparently converted to a GFP_NOIO allocation. Additionally, when using a Linux 3.9 or newer kernel, it will set PF_MEMALLOC_NOIO to prevent direct reclaim from entering pageout() on on any KM_PUSHPAGE or KM_NOSLEEP allocation. This effectively allows the spl_vmalloc() helper function to be used safely in a thread which is responsible for IO. Signed-off-by: Brian Behlendorf --- include/sys/kmem.h | 34 ++++++++++++++++++++++++++++++++++ include/sys/vmem.h | 1 + module/spl/spl-kmem-cache.c | 4 +++- module/spl/spl-kmem.c | 2 +- module/spl/spl-vmem.c | 25 +++++++++++++++++++++++++ 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/include/sys/kmem.h b/include/sys/kmem.h index 045d07c2c..8d5e72937 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -25,6 +25,7 @@ #ifndef _SPL_KMEM_H #define _SPL_KMEM_H +#include #include #include @@ -72,6 +73,39 @@ kmem_flags_convert(int flags) return (lflags); } +typedef struct { + struct task_struct *fstrans_thread; + unsigned int saved_flags; +} fstrans_cookie_t; + +static inline fstrans_cookie_t +spl_fstrans_mark(void) +{ + fstrans_cookie_t cookie; + + cookie.fstrans_thread = current; + cookie.saved_flags = current->flags & PF_FSTRANS; + current->flags |= PF_FSTRANS; + + return (cookie); +} + +static inline void +spl_fstrans_unmark(fstrans_cookie_t cookie) +{ + ASSERT3P(cookie.fstrans_thread, ==, current); + ASSERT(current->flags & PF_FSTRANS); + + current->flags &= ~(PF_FSTRANS); + current->flags |= cookie.saved_flags; +} + +static inline int +spl_fstrans_check(void) +{ + return (current->flags & PF_FSTRANS); +} + #ifdef HAVE_ATOMIC64_T #define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used) #define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used) diff --git a/include/sys/vmem.h b/include/sys/vmem.h index 6eb2c6769..8aadc9d03 100644 --- a/include/sys/vmem.h +++ b/include/sys/vmem.h @@ -36,6 +36,7 @@ extern vmem_t *zio_alloc_arena; extern vmem_t *zio_arena; extern size_t vmem_size(vmem_t *vmp, int typemask); +extern void *spl_vmalloc(unsigned long size, gfp_t lflags, pgprot_t prot); /* * Memory allocation interfaces diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c index 9a8ccfe42..f8edb44a9 100644 --- a/module/spl/spl-kmem-cache.c +++ b/module/spl/spl-kmem-cache.c @@ -153,7 +153,7 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags) if (skc->skc_flags & KMC_KMEM) ptr = (void *)__get_free_pages(lflags, get_order(size)); else - ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL); + ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL); /* Resulting allocated memory will be page aligned */ ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE)); @@ -1098,7 +1098,9 @@ spl_cache_grow_work(void *data) sks = spl_slab_alloc(skc, ska->ska_flags); memalloc_noio_restore(noio_flag); #else + fstrans_cookie_t cookie = spl_fstrans_mark(); sks = spl_slab_alloc(skc, ska->ska_flags); + spl_fstrans_unmark(cookie); #endif spin_lock(&skc->skc_lock); if (sks) { diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index 4cd7cdbee..914f0fbf7 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -184,7 +184,7 @@ spl_kmem_alloc_impl(size_t size, int flags, int node) */ if (unlikely(size > spl_kmem_alloc_max)) { if (flags & KM_VMEM) { - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = spl_vmalloc(size, lflags, PAGE_KERNEL); } else { return (NULL); } diff --git a/module/spl/spl-vmem.c b/module/spl/spl-vmem.c index e177988a7..bca27f263 100644 --- a/module/spl/spl-vmem.c +++ b/module/spl/spl-vmem.c @@ -97,6 +97,31 @@ spl_vmem_free(const void *buf, size_t size) } EXPORT_SYMBOL(spl_vmem_free); +/* + * Public vmalloc() interface designed to be safe to be called during I/O. + */ +void * +spl_vmalloc(unsigned long size, gfp_t lflags, pgprot_t prot) +{ +#if defined(PF_MEMALLOC_NOIO) + void *ptr; + unsigned noio_flag = 0; + + if (spl_fstrans_check()) + noio_flag = memalloc_noio_save(); + + ptr = __vmalloc(size, lflags, prot); + + if (spl_fstrans_check()) + memalloc_noio_restore(noio_flag); + + return (ptr); +#else + return (__vmalloc(size, lflags, prot)); +#endif +} +EXPORT_SYMBOL(spl_vmalloc); + int spl_vmem_init(void) {