mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 18:34:22 +03:00
Refactor generic memory allocation interfaces
This patch achieves the following goals: 1. It replaces the preprocessor kmem flag to gfp flag mapping with proper translation logic. This eliminates the potential for surprises that were previously possible where kmem flags were mapped to gfp flags. 2. It maps vmem_alloc() allocations to kmem_alloc() for allocations sized less than or equal to the newly-added spl_kmem_alloc_max parameter. This ensures that small allocations will not contend on a single global lock, large allocations can still be handled, and potentially limited virtual address space will not be squandered. This behavior is entirely different than under Illumos due to different memory management strategies employed by the respective kernels. However, this functionally provides the semantics required. 3. The --disable-debug-kmem, --enable-debug-kmem (default), and --enable-debug-kmem-tracking allocators have been unified in to a single spl_kmem_alloc_impl() allocation function. This was done to simplify the code and make it more maintainable. 4. Improve portability by exposing an implementation of the memory allocations functions that can be safely used in the same way they are used on Illumos. Specifically, callers may safely use KM_SLEEP in contexts which perform filesystem IO. This allows us to eliminate an entire class of Linux specific changes which were previously required to avoid deadlocking the system. This change will be largely transparent to existing callers but there are a few caveats: 1. Because the headers were refactored and extraneous includes removed callers may find they need to explicitly add additional #includes. In particular, kmem_cache.h must now be explicitly includes to access the SPL's kmem cache implementation. This behavior is different from Illumos but it was done to avoid always masking the Linux slab functions when kmem.h is included. 2. Callers, like Lustre, which made assumptions about the definitions of KM_SLEEP, KM_NOSLEEP, and KM_PUSHPAGE will need to be updated. Other callers such as ZFS which did not will not require changes. 3. KM_PUSHPAGE is no longer overloaded to imply GFP_NOIO. It retains its original meaning of allowing allocations to access reserved memory. KM_PUSHPAGE callers can be converted back to KM_SLEEP. 4. The KM_NODEBUG flags has been retired and the default warning threshold increased to 32k. 5. The kmem_virt() functions has been removed. For callers which need to distinguish between a physical and virtual address use is_vmalloc_addr(). Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
parent
b34b95635a
commit
c3eabc75b1
@ -26,6 +26,7 @@
|
||||
#define _SPL_KMEM_H
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
extern int kmem_debugging(void);
|
||||
extern char *kmem_vasprintf(const char *fmt, va_list ap);
|
||||
@ -36,68 +37,41 @@ extern void strfree(char *str);
|
||||
/*
|
||||
* Memory allocation interfaces
|
||||
*/
|
||||
#define KM_SLEEP GFP_KERNEL /* Can sleep, never fails */
|
||||
#define KM_NOSLEEP GFP_ATOMIC /* Can not sleep, may fail */
|
||||
#define KM_PUSHPAGE (GFP_NOIO | __GFP_HIGH) /* Use reserved memory */
|
||||
#define KM_NODEBUG __GFP_NOWARN /* Suppress warnings */
|
||||
#define KM_FLAGS __GFP_BITS_MASK
|
||||
#define KM_VMFLAGS GFP_LEVEL_MASK
|
||||
#define KM_SLEEP 0x0000 /* can block for memory; success guaranteed */
|
||||
#define KM_NOSLEEP 0x0001 /* cannot block for memory; may fail */
|
||||
#define KM_PUSHPAGE 0x0004 /* can block for memory; may use reserve */
|
||||
#define KM_ZERO 0x1000 /* zero the allocation */
|
||||
#define KM_VMEM 0x2000 /* caller is vmem_* wrapper */
|
||||
|
||||
#define KM_PUBLIC_MASK (KM_SLEEP | KM_NOSLEEP | KM_PUSHPAGE)
|
||||
|
||||
/*
|
||||
* Used internally, the kernel does not need to support this flag
|
||||
* Convert a KM_* flags mask to its Linux GFP_* counterpart. The conversion
|
||||
* function is context aware which means that KM_SLEEP allocations can be
|
||||
* safely used in syncing contexts which have set PF_FSTRANS.
|
||||
*/
|
||||
#ifndef __GFP_ZERO
|
||||
#define __GFP_ZERO 0x8000
|
||||
#endif
|
||||
|
||||
/*
|
||||
* __GFP_NOFAIL looks like it will be removed from the kernel perhaps as
|
||||
* early as 2.6.32. To avoid this issue when it occurs in upstream kernels
|
||||
* we retry the allocation here as long as it is not __GFP_WAIT (GFP_ATOMIC).
|
||||
* I would prefer the caller handle the failure case cleanly but we are
|
||||
* trying to emulate Solaris and those are not the Solaris semantics.
|
||||
*/
|
||||
static inline void *
|
||||
kmalloc_nofail(size_t size, gfp_t flags)
|
||||
static inline gfp_t
|
||||
kmem_flags_convert(int flags)
|
||||
{
|
||||
void *ptr;
|
||||
gfp_t lflags = __GFP_NOWARN | __GFP_COMP;
|
||||
|
||||
do {
|
||||
ptr = kmalloc(size, flags);
|
||||
} while (ptr == NULL && (flags & __GFP_WAIT));
|
||||
if (flags & KM_NOSLEEP) {
|
||||
lflags |= GFP_ATOMIC | __GFP_NORETRY;
|
||||
} else {
|
||||
lflags |= GFP_KERNEL;
|
||||
if ((current->flags & PF_FSTRANS))
|
||||
lflags &= ~(__GFP_IO|__GFP_FS);
|
||||
}
|
||||
|
||||
return (ptr);
|
||||
if (flags & KM_PUSHPAGE)
|
||||
lflags |= __GFP_HIGH;
|
||||
|
||||
if (flags & KM_ZERO)
|
||||
lflags |= __GFP_ZERO;
|
||||
|
||||
return (lflags);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
kzalloc_nofail(size_t size, gfp_t flags)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
do {
|
||||
ptr = kzalloc(size, flags);
|
||||
} while (ptr == NULL && (flags & __GFP_WAIT));
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
kmalloc_node_nofail(size_t size, gfp_t flags, int node)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
do {
|
||||
ptr = kmalloc_node(size, flags, node);
|
||||
} while (ptr == NULL && (flags & __GFP_WAIT));
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_KMEM
|
||||
|
||||
/*
|
||||
* Memory accounting functions to be used only when DEBUG_KMEM is set.
|
||||
*/
|
||||
#ifdef HAVE_ATOMIC64_T
|
||||
#define kmem_alloc_used_add(size) atomic64_add(size, &kmem_alloc_used)
|
||||
#define kmem_alloc_used_sub(size) atomic64_sub(size, &kmem_alloc_used)
|
||||
@ -114,70 +88,29 @@ extern atomic_t kmem_alloc_used;
|
||||
extern unsigned long long kmem_alloc_max;
|
||||
#endif /* HAVE_ATOMIC64_T */
|
||||
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
extern unsigned int spl_kmem_alloc_warn;
|
||||
extern unsigned int spl_kmem_alloc_max;
|
||||
|
||||
#define kmem_alloc(sz, fl) spl_kmem_alloc((sz), (fl), __func__, __LINE__)
|
||||
#define kmem_zalloc(sz, fl) spl_kmem_zalloc((sz), (fl), __func__, __LINE__)
|
||||
#define kmem_free(ptr, sz) spl_kmem_free((ptr), (sz))
|
||||
|
||||
extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line);
|
||||
extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line);
|
||||
extern void spl_kmem_free(const void *ptr, size_t sz);
|
||||
|
||||
/*
|
||||
* DEBUG_KMEM && DEBUG_KMEM_TRACKING
|
||||
*
|
||||
* The maximum level of memory debugging. All memory will be accounted
|
||||
* for and each allocation will be explicitly tracked. Any allocation
|
||||
* which is leaked will be reported on module unload and the exact location
|
||||
* where that memory was allocation will be reported. This level of memory
|
||||
* tracking will have a significant impact on performance and should only
|
||||
* be enabled for debugging. This feature may be enabled by passing
|
||||
* --enable-debug-kmem-tracking to configure.
|
||||
* The following functions are only available for internal use.
|
||||
*/
|
||||
#define kmem_alloc(sz, fl) kmem_alloc_track((sz), (fl), \
|
||||
__FUNCTION__, __LINE__, 0, 0)
|
||||
#define kmem_zalloc(sz, fl) kmem_alloc_track((sz), (fl)|__GFP_ZERO,\
|
||||
__FUNCTION__, __LINE__, 0, 0)
|
||||
#define kmem_alloc_node(sz, fl, nd) kmem_alloc_track((sz), (fl), \
|
||||
__FUNCTION__, __LINE__, 1, nd)
|
||||
#define kmem_free(ptr, sz) kmem_free_track((ptr), (sz))
|
||||
extern void *spl_kmem_alloc_impl(size_t size, int flags, int node);
|
||||
extern void *spl_kmem_alloc_debug(size_t size, int flags, int node);
|
||||
extern void *spl_kmem_alloc_track(size_t size, int flags,
|
||||
const char *func, int line, int node);
|
||||
extern void spl_kmem_free_impl(const void *buf, size_t size);
|
||||
extern void spl_kmem_free_debug(const void *buf, size_t size);
|
||||
extern void spl_kmem_free_track(const void *buf, size_t size);
|
||||
|
||||
extern void *kmem_alloc_track(size_t, int, const char *, int, int, int);
|
||||
extern void kmem_free_track(const void *, size_t);
|
||||
|
||||
#else /* DEBUG_KMEM_TRACKING */
|
||||
/*
|
||||
* DEBUG_KMEM && !DEBUG_KMEM_TRACKING
|
||||
*
|
||||
* The default build will set DEBUG_KEM. This provides basic memory
|
||||
* accounting with little to no impact on performance. When the module
|
||||
* is unloaded in any memory was leaked the total number of leaked bytes
|
||||
* will be reported on the console. To disable this basic accounting
|
||||
* pass the --disable-debug-kmem option to configure.
|
||||
*/
|
||||
#define kmem_alloc(sz, fl) kmem_alloc_debug((sz), (fl), \
|
||||
__FUNCTION__, __LINE__, 0, 0)
|
||||
#define kmem_zalloc(sz, fl) kmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
|
||||
__FUNCTION__, __LINE__, 0, 0)
|
||||
#define kmem_alloc_node(sz, fl, nd) kmem_alloc_debug((sz), (fl), \
|
||||
__FUNCTION__, __LINE__, 1, nd)
|
||||
#define kmem_free(ptr, sz) kmem_free_debug((ptr), (sz))
|
||||
|
||||
extern void *kmem_alloc_debug(size_t, int, const char *, int, int, int);
|
||||
extern void kmem_free_debug(const void *, size_t);
|
||||
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#else /* DEBUG_KMEM */
|
||||
/*
|
||||
* !DEBUG_KMEM && !DEBUG_KMEM_TRACKING
|
||||
*
|
||||
* All debugging is disabled. There will be no overhead even for
|
||||
* minimal memory accounting. To enable basic accounting pass the
|
||||
* --enable-debug-kmem option to configure.
|
||||
*/
|
||||
#define kmem_alloc(sz, fl) kmalloc_nofail((sz), (fl))
|
||||
#define kmem_zalloc(sz, fl) kzalloc_nofail((sz), (fl))
|
||||
#define kmem_alloc_node(sz, fl, nd) kmalloc_node_nofail((sz), (fl), (nd))
|
||||
#define kmem_free(ptr, sz) ((void)(sz), kfree(ptr))
|
||||
|
||||
#endif /* DEBUG_KMEM */
|
||||
|
||||
int spl_kmem_init(void);
|
||||
void spl_kmem_fini(void);
|
||||
|
||||
#define kmem_virt(ptr) (((ptr) >= (void *)VMALLOC_START) && \
|
||||
((ptr) < (void *)VMALLOC_END))
|
||||
extern int spl_kmem_init(void);
|
||||
extern void spl_kmem_fini(void);
|
||||
|
||||
#endif /* _SPL_KMEM_H */
|
||||
|
@ -202,6 +202,7 @@ extern void spl_kmem_cache_set_move(spl_kmem_cache_t *,
|
||||
extern void spl_kmem_cache_destroy(spl_kmem_cache_t *skc);
|
||||
extern void *spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags);
|
||||
extern void spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj);
|
||||
extern void spl_kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags);
|
||||
extern void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc, int count);
|
||||
extern void spl_kmem_reap(void);
|
||||
|
||||
@ -214,29 +215,6 @@ extern void spl_kmem_reap(void);
|
||||
#define kmem_cache_reap_now(skc) \
|
||||
spl_kmem_cache_reap_now(skc, skc->skc_reap)
|
||||
#define kmem_reap() spl_kmem_reap()
|
||||
#define kmem_virt(ptr) \
|
||||
(((ptr) >= (void *)VMALLOC_START) && \
|
||||
((ptr) < (void *)VMALLOC_END))
|
||||
|
||||
/*
|
||||
* Allow custom slab allocation flags to be set for KMC_SLAB based caches.
|
||||
* One use for this function is to ensure the __GFP_COMP flag is part of
|
||||
* the default allocation mask which ensures higher order allocations are
|
||||
* properly refcounted. This flag was added to the default ->allocflags
|
||||
* as of Linux 3.11.
|
||||
*/
|
||||
static inline void
|
||||
kmem_cache_set_allocflags(spl_kmem_cache_t *skc, gfp_t flags)
|
||||
{
|
||||
if (skc->skc_linux_cache == NULL)
|
||||
return;
|
||||
|
||||
#if defined(HAVE_KMEM_CACHE_ALLOCFLAGS)
|
||||
skc->skc_linux_cache->allocflags |= flags;
|
||||
#elif defined(HAVE_KMEM_CACHE_GFPFLAGS)
|
||||
skc->skc_linux_cache->gfpflags |= flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* The following functions are only available for internal use.
|
||||
|
@ -47,135 +47,60 @@ extern size_t vmem_size(vmem_t *vmp, int typemask);
|
||||
#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
|
||||
#endif
|
||||
|
||||
static inline void *
|
||||
vmalloc_nofail(size_t size, gfp_t flags)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
/*
|
||||
* Retry failed __vmalloc() allocations once every second. The
|
||||
* rational for the delay is that the likely failure modes are:
|
||||
*
|
||||
* 1) The system has completely exhausted memory, in which case
|
||||
* delaying 1 second for the memory reclaim to run is reasonable
|
||||
* to avoid thrashing the system.
|
||||
* 2) The system has memory but has exhausted the small virtual
|
||||
* address space available on 32-bit systems. Retrying the
|
||||
* allocation immediately will only result in spinning on the
|
||||
* virtual address space lock. It is better delay a second and
|
||||
* hope that another process will free some of the address space.
|
||||
* But the bottom line is there is not much we can actually do
|
||||
* since we can never safely return a failure and honor the
|
||||
* Solaris semantics.
|
||||
*/
|
||||
while (1) {
|
||||
ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
|
||||
if (unlikely((ptr == NULL) && (flags & __GFP_WAIT))) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule_timeout(HZ);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
vzalloc_nofail(size_t size, gfp_t flags)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
ptr = vmalloc_nofail(size, flags);
|
||||
if (ptr)
|
||||
memset(ptr, 0, (size));
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_KMEM
|
||||
|
||||
/*
|
||||
* Memory accounting functions to be used only when DEBUG_KMEM is set.
|
||||
*/
|
||||
#ifdef HAVE_ATOMIC64_T
|
||||
|
||||
#define vmem_alloc_used_add(size) atomic64_add(size, &vmem_alloc_used)
|
||||
#define vmem_alloc_used_sub(size) atomic64_sub(size, &vmem_alloc_used)
|
||||
#define vmem_alloc_used_read() atomic64_read(&vmem_alloc_used)
|
||||
#define vmem_alloc_used_set(size) atomic64_set(&vmem_alloc_used, size)
|
||||
|
||||
extern atomic64_t vmem_alloc_used;
|
||||
extern unsigned long long vmem_alloc_max;
|
||||
|
||||
#else /* HAVE_ATOMIC64_T */
|
||||
|
||||
#define vmem_alloc_used_add(size) atomic_add(size, &vmem_alloc_used)
|
||||
#define vmem_alloc_used_sub(size) atomic_sub(size, &vmem_alloc_used)
|
||||
#define vmem_alloc_used_read() atomic_read(&vmem_alloc_used)
|
||||
#define vmem_alloc_used_set(size) atomic_set(&vmem_alloc_used, size)
|
||||
|
||||
extern atomic_t vmem_alloc_used;
|
||||
extern unsigned long long vmem_alloc_max;
|
||||
|
||||
#endif /* HAVE_ATOMIC64_T */
|
||||
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
/*
|
||||
* DEBUG_KMEM && DEBUG_KMEM_TRACKING
|
||||
* vmem_* is an interface to a low level arena-based memory allocator on
|
||||
* Illumos that is used to allocate virtual address space. The kmem SLAB
|
||||
* allocator allocates slabs from it. Then the generic allocation functions
|
||||
* kmem_{alloc,zalloc,free}() are layered on top of SLAB allocators.
|
||||
*
|
||||
* The maximum level of memory debugging. All memory will be accounted
|
||||
* for and each allocation will be explicitly tracked. Any allocation
|
||||
* which is leaked will be reported on module unload and the exact location
|
||||
* where that memory was allocation will be reported. This level of memory
|
||||
* tracking will have a significant impact on performance and should only
|
||||
* be enabled for debugging. This feature may be enabled by passing
|
||||
* --enable-debug-kmem-tracking to configure.
|
||||
*/
|
||||
#define vmem_alloc(sz, fl) vmem_alloc_track((sz), (fl), \
|
||||
__FUNCTION__, __LINE__)
|
||||
#define vmem_zalloc(sz, fl) vmem_alloc_track((sz), (fl)|__GFP_ZERO,\
|
||||
__FUNCTION__, __LINE__)
|
||||
#define vmem_free(ptr, sz) vmem_free_track((ptr), (sz))
|
||||
|
||||
extern void *kmem_alloc_track(size_t, int, const char *, int, int, int);
|
||||
extern void kmem_free_track(const void *, size_t);
|
||||
extern void *vmem_alloc_track(size_t, int, const char *, int);
|
||||
extern void vmem_free_track(const void *, size_t);
|
||||
|
||||
#else /* DEBUG_KMEM_TRACKING */
|
||||
/*
|
||||
* DEBUG_KMEM && !DEBUG_KMEM_TRACKING
|
||||
* On Linux, the primary means of doing allocations is via kmalloc(), which
|
||||
* is similarly layered on top of something called the buddy allocator. The
|
||||
* buddy allocator is not available to kernel modules, it uses physical
|
||||
* memory addresses rather than virtual memory addresses and is prone to
|
||||
* fragmentation.
|
||||
*
|
||||
* The default build will set DEBUG_KEM. This provides basic memory
|
||||
* accounting with little to no impact on performance. When the module
|
||||
* is unloaded in any memory was leaked the total number of leaked bytes
|
||||
* will be reported on the console. To disable this basic accounting
|
||||
* pass the --disable-debug-kmem option to configure.
|
||||
*/
|
||||
#define vmem_alloc(sz, fl) vmem_alloc_debug((sz), (fl), \
|
||||
__FUNCTION__, __LINE__)
|
||||
#define vmem_zalloc(sz, fl) vmem_alloc_debug((sz), (fl)|__GFP_ZERO,\
|
||||
__FUNCTION__, __LINE__)
|
||||
#define vmem_free(ptr, sz) vmem_free_debug((ptr), (sz))
|
||||
|
||||
extern void *vmem_alloc_debug(size_t, int, const char *, int);
|
||||
extern void vmem_free_debug(const void *, size_t);
|
||||
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#else /* DEBUG_KMEM */
|
||||
/*
|
||||
* !DEBUG_KMEM && !DEBUG_KMEM_TRACKING
|
||||
* Linux sets aside a relatively small address space for in-kernel virtual
|
||||
* memory from which allocations can be done using vmalloc(). It might seem
|
||||
* like a good idea to use vmalloc() to implement something similar to
|
||||
* Illumos' allocator. However, this has the following problems:
|
||||
*
|
||||
* All debugging is disabled. There will be no overhead even for
|
||||
* minimal memory accounting. To enable basic accounting pass the
|
||||
* --enable-debug-kmem option to configure.
|
||||
* 1. Page directory table allocations are hard coded to use GFP_KERNEL.
|
||||
* Consequently, any KM_PUSHPAGE or KM_NOSLEEP allocations done using
|
||||
* vmalloc() will not have proper semantics.
|
||||
*
|
||||
* 2. Address space exhaustion is a real issue on 32-bit platforms where
|
||||
* only a few 100MB are available. The kernel will handle it by spinning
|
||||
* when it runs out of address space.
|
||||
*
|
||||
* 3. All vmalloc() allocations and frees are protected by a single global
|
||||
* lock which serializes all allocations.
|
||||
*
|
||||
* 4. Accessing /proc/meminfo and /proc/vmallocinfo will iterate the entire
|
||||
* list. The former will sum the allocations while the latter will print
|
||||
* them to user space in a way that user space can keep the lock held
|
||||
* indefinitely. When the total number of mapped allocations is large
|
||||
* (several 100,000) a large amount of time will be spent waiting on locks.
|
||||
*
|
||||
* 5. Linux has a wait_on_bit() locking primitive that assumes physical
|
||||
* memory is used, it simply does not work on virtual memory. Certain
|
||||
* Linux structures (e.g. the superblock) use them and might be embedded
|
||||
* into a structure from Illumos. This makes using Linux virtual memory
|
||||
* unsafe in certain situations.
|
||||
*
|
||||
* It follows that we cannot obtain identical semantics to those on Illumos.
|
||||
* Consequently, we implement the kmem_{alloc,zalloc,free}() functions in
|
||||
* such a way that they can be used as drop-in replacements for small vmem_*
|
||||
* allocations (8MB in size or smaller) and map vmem_{alloc,zalloc,free}()
|
||||
* to them.
|
||||
*/
|
||||
#define vmem_alloc(sz, fl) vmalloc_nofail((sz), (fl))
|
||||
#define vmem_zalloc(sz, fl) vzalloc_nofail((sz), (fl))
|
||||
#define vmem_free(ptr, sz) ((void)(sz), vfree(ptr))
|
||||
|
||||
#endif /* DEBUG_KMEM */
|
||||
#define vmem_alloc(sz, fl) spl_vmem_alloc((sz), (fl), __func__, __LINE__)
|
||||
#define vmem_zalloc(sz, fl) spl_vmem_zalloc((sz), (fl), __func__, __LINE__)
|
||||
#define vmem_free(ptr, sz) spl_vmem_free((ptr), (sz))
|
||||
|
||||
extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line);
|
||||
extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line);
|
||||
extern void spl_vmem_free(const void *ptr, size_t sz);
|
||||
|
||||
int spl_vmem_init(void);
|
||||
void spl_vmem_fini(void);
|
||||
|
@ -80,6 +80,46 @@ By age (0x1) or low memory (0x2)
|
||||
Default value: \fB0\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBspl_kmem_alloc_warn\fR (uint)
|
||||
.ad
|
||||
.RS 12n
|
||||
As a general rule kmem_alloc() allocations should be small, preferably
|
||||
just a few pages since they must by physically contiguous. Therefore, a
|
||||
rate limited warning will be printed to the console for any kmem_alloc()
|
||||
which exceeds a reasonable threshold.
|
||||
|
||||
The default warning threshold is set to eight pages but capped at 32K to
|
||||
accommodate systems using large pages. This value was selected to be small
|
||||
enough to ensure the largest allocations are quickly noticed and fixed.
|
||||
But large enough to avoid logging any warnings when a allocation size is
|
||||
larger than optimal but not a serious concern. Since this value is tunable,
|
||||
developers are encouraged to set it lower when testing so any new largish
|
||||
allocations are quickly caught. These warnings may be disabled by setting
|
||||
the threshold to zero.
|
||||
.sp
|
||||
Default value: \fB32K\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBspl_kmem_alloc_max\fR (uint)
|
||||
.ad
|
||||
.RS 12n
|
||||
Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
|
||||
Allocations which are marginally smaller than this limit may succeed but
|
||||
should still be avoided due to the expense of locating a contiguous range
|
||||
of free pages. Therefore, a maximum kmem size with reasonable safely
|
||||
margin of 4x is set. Kmem_alloc() allocations larger than this maximum
|
||||
will quickly fail. Vmem_alloc() allocations less than or equal to this
|
||||
value will use kmalloc(), but shift to vmalloc() when exceeding this value.
|
||||
.sp
|
||||
Default value: \fBKMALLOC_MAX_SIZE/4\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
@ -130,19 +130,6 @@ MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
|
||||
* One serious concern I do have about this method is the relatively
|
||||
* small virtual address space on 32bit arches. This will seriously
|
||||
* constrain the size of the slab caches and their performance.
|
||||
*
|
||||
* XXX: Improve the partial slab list by carefully maintaining a
|
||||
* strict ordering of fullest to emptiest slabs based on
|
||||
* the slab reference count. This guarantees that when freeing
|
||||
* slabs back to the system we need only linearly traverse the
|
||||
* last N slabs in the list to discover all the freeable slabs.
|
||||
*
|
||||
* XXX: NUMA awareness for optionally allocating memory close to a
|
||||
* particular core. This can be advantageous if you know the slab
|
||||
* object will be short lived and primarily accessed from one core.
|
||||
*
|
||||
* XXX: Slab coloring may also yield performance improvements and would
|
||||
* be desirable to implement.
|
||||
*/
|
||||
|
||||
struct list_head spl_kmem_cache_list; /* List of caches */
|
||||
@ -158,15 +145,15 @@ SPL_SHRINKER_DECLARE(spl_kmem_cache_shrinker,
|
||||
static void *
|
||||
kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
|
||||
{
|
||||
gfp_t lflags = kmem_flags_convert(flags);
|
||||
void *ptr;
|
||||
|
||||
ASSERT(ISP2(size));
|
||||
|
||||
if (skc->skc_flags & KMC_KMEM)
|
||||
ptr = (void *)__get_free_pages(flags | __GFP_COMP,
|
||||
get_order(size));
|
||||
ptr = (void *)__get_free_pages(lflags, get_order(size));
|
||||
else
|
||||
ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
|
||||
ptr = __vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL);
|
||||
|
||||
/* Resulting allocated memory will be page aligned */
|
||||
ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
|
||||
@ -361,12 +348,11 @@ spl_slab_free(spl_kmem_slab_t *sks,
|
||||
}
|
||||
|
||||
/*
|
||||
* Traverse all the partial slabs attached to a cache and free those
|
||||
* which which are currently empty, and have not been touched for
|
||||
* skc_delay seconds to avoid thrashing. The count argument is
|
||||
* passed to optionally cap the number of slabs reclaimed, a count
|
||||
* of zero means try and reclaim everything. When flag is set we
|
||||
* always free an available slab regardless of age.
|
||||
* Traverse all the partial slabs attached to a cache and free those which
|
||||
* are currently empty, and have not been touched for skc_delay seconds to
|
||||
* avoid thrashing. The count argument is passed to optionally cap the
|
||||
* number of slabs reclaimed, a count of zero means try and reclaim
|
||||
* everything. When flag the is set available slabs freed regardless of age.
|
||||
*/
|
||||
static void
|
||||
spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
|
||||
@ -480,6 +466,7 @@ spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
|
||||
static int
|
||||
spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
|
||||
{
|
||||
gfp_t lflags = kmem_flags_convert(flags);
|
||||
spl_kmem_emergency_t *ske;
|
||||
int empty;
|
||||
|
||||
@ -490,11 +477,11 @@ spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
|
||||
if (!empty)
|
||||
return (-EEXIST);
|
||||
|
||||
ske = kmalloc(sizeof (*ske), flags);
|
||||
ske = kmalloc(sizeof (*ske), lflags);
|
||||
if (ske == NULL)
|
||||
return (-ENOMEM);
|
||||
|
||||
ske->ske_obj = kmalloc(skc->skc_obj_size, flags);
|
||||
ske->ske_obj = kmalloc(skc->skc_obj_size, lflags);
|
||||
if (ske->ske_obj == NULL) {
|
||||
kfree(ske);
|
||||
return (-ENOMEM);
|
||||
@ -734,7 +721,7 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
|
||||
int size = sizeof (spl_kmem_magazine_t) +
|
||||
sizeof (void *) * skc->skc_mag_size;
|
||||
|
||||
skm = kmem_alloc_node(size, KM_SLEEP, cpu_to_node(cpu));
|
||||
skm = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (skm) {
|
||||
skm->skm_magic = SKM_MAGIC;
|
||||
skm->skm_avail = 0;
|
||||
@ -754,13 +741,9 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
|
||||
static void
|
||||
spl_magazine_free(spl_kmem_magazine_t *skm)
|
||||
{
|
||||
int size = sizeof (spl_kmem_magazine_t) +
|
||||
sizeof (void *) * skm->skm_size;
|
||||
|
||||
ASSERT(skm->skm_magic == SKM_MAGIC);
|
||||
ASSERT(skm->skm_avail == 0);
|
||||
|
||||
kmem_free(skm, size);
|
||||
kfree(skm);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -835,6 +818,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
|
||||
spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, spl_kmem_reclaim_t reclaim,
|
||||
void *priv, void *vmp, int flags)
|
||||
{
|
||||
gfp_t lflags = kmem_flags_convert(KM_SLEEP);
|
||||
spl_kmem_cache_t *skc;
|
||||
int rc;
|
||||
|
||||
@ -852,18 +836,17 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
|
||||
* Allocate memory for a new cache and initialize it. Unfortunately,
|
||||
* this usually ends up being a large allocation of ~32k because
|
||||
* we need to allocate enough memory for the worst case number of
|
||||
* cpus in the magazine, skc_mag[NR_CPUS]. Because of this we
|
||||
* explicitly pass KM_NODEBUG to suppress the kmem warning
|
||||
* cpus in the magazine, skc_mag[NR_CPUS].
|
||||
*/
|
||||
skc = kmem_zalloc(sizeof (*skc), KM_SLEEP| KM_NODEBUG);
|
||||
skc = kzalloc(sizeof (*skc), lflags);
|
||||
if (skc == NULL)
|
||||
return (NULL);
|
||||
|
||||
skc->skc_magic = SKC_MAGIC;
|
||||
skc->skc_name_size = strlen(name) + 1;
|
||||
skc->skc_name = (char *)kmem_alloc(skc->skc_name_size, KM_SLEEP);
|
||||
skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags);
|
||||
if (skc->skc_name == NULL) {
|
||||
kmem_free(skc, sizeof (*skc));
|
||||
kfree(skc);
|
||||
return (NULL);
|
||||
}
|
||||
strncpy(skc->skc_name, name, skc->skc_name_size);
|
||||
@ -962,7 +945,11 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
|
||||
goto out;
|
||||
}
|
||||
|
||||
kmem_cache_set_allocflags(skc, __GFP_COMP);
|
||||
#if defined(HAVE_KMEM_CACHE_ALLOCFLAGS)
|
||||
skc->skc_linux_cache->allocflags |= __GFP_COMP;
|
||||
#elif defined(HAVE_KMEM_CACHE_GFPFLAGS)
|
||||
skc->skc_linux_cache->gfpflags |= __GFP_COMP;
|
||||
#endif
|
||||
skc->skc_flags |= KMC_NOMAGAZINE;
|
||||
}
|
||||
|
||||
@ -977,8 +964,8 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
|
||||
|
||||
return (skc);
|
||||
out:
|
||||
kmem_free(skc->skc_name, skc->skc_name_size);
|
||||
kmem_free(skc, sizeof (*skc));
|
||||
kfree(skc->skc_name);
|
||||
kfree(skc);
|
||||
return (NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kmem_cache_create);
|
||||
@ -1048,10 +1035,10 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
|
||||
ASSERT3U(skc->skc_obj_emergency, ==, 0);
|
||||
ASSERT(list_empty(&skc->skc_complete_list));
|
||||
|
||||
kmem_free(skc->skc_name, skc->skc_name_size);
|
||||
spin_unlock(&skc->skc_lock);
|
||||
|
||||
kmem_free(skc, sizeof (*skc));
|
||||
kfree(skc->skc_name);
|
||||
kfree(skc);
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kmem_cache_destroy);
|
||||
|
||||
@ -1106,7 +1093,13 @@ spl_cache_grow_work(void *data)
|
||||
spl_kmem_cache_t *skc = ska->ska_cache;
|
||||
spl_kmem_slab_t *sks;
|
||||
|
||||
sks = spl_slab_alloc(skc, ska->ska_flags | __GFP_NORETRY | KM_NODEBUG);
|
||||
#if defined(PF_MEMALLOC_NOIO)
|
||||
unsigned noio_flag = memalloc_noio_save();
|
||||
sks = spl_slab_alloc(skc, ska->ska_flags);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
#else
|
||||
sks = spl_slab_alloc(skc, ska->ska_flags);
|
||||
#endif
|
||||
spin_lock(&skc->skc_lock);
|
||||
if (sks) {
|
||||
skc->skc_slab_total++;
|
||||
@ -1140,8 +1133,9 @@ spl_cache_grow_wait(spl_kmem_cache_t *skc)
|
||||
static int
|
||||
spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
|
||||
{
|
||||
int remaining, rc;
|
||||
int remaining, rc = 0;
|
||||
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
ASSERT(skc->skc_magic == SKC_MAGIC);
|
||||
ASSERT((skc->skc_flags & KMC_SLAB) == 0);
|
||||
might_sleep();
|
||||
@ -1166,7 +1160,7 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
|
||||
if (test_and_set_bit(KMC_BIT_GROWING, &skc->skc_flags) == 0) {
|
||||
spl_kmem_alloc_t *ska;
|
||||
|
||||
ska = kmalloc(sizeof (*ska), flags);
|
||||
ska = kmalloc(sizeof (*ska), kmem_flags_convert(flags));
|
||||
if (ska == NULL) {
|
||||
clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
|
||||
wake_up_all(&skc->skc_waitq);
|
||||
@ -1175,7 +1169,7 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
|
||||
|
||||
atomic_inc(&skc->skc_ref);
|
||||
ska->ska_cache = skc;
|
||||
ska->ska_flags = flags & ~__GFP_FS;
|
||||
ska->ska_flags = flags;
|
||||
taskq_init_ent(&ska->ska_tqe);
|
||||
taskq_dispatch_ent(spl_kmem_cache_taskq,
|
||||
spl_cache_grow_work, ska, 0, &ska->ska_tqe);
|
||||
@ -1347,9 +1341,9 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
|
||||
spl_kmem_magazine_t *skm;
|
||||
void *obj = NULL;
|
||||
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
ASSERT(skc->skc_magic == SKC_MAGIC);
|
||||
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
|
||||
ASSERT(flags & KM_SLEEP);
|
||||
|
||||
atomic_inc(&skc->skc_ref);
|
||||
|
||||
@ -1360,9 +1354,8 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
|
||||
*/
|
||||
if (skc->skc_flags & KMC_SLAB) {
|
||||
struct kmem_cache *slc = skc->skc_linux_cache;
|
||||
|
||||
do {
|
||||
obj = kmem_cache_alloc(slc, flags | __GFP_COMP);
|
||||
obj = kmem_cache_alloc(slc, kmem_flags_convert(flags));
|
||||
} while ((obj == NULL) && !(flags & KM_NOSLEEP));
|
||||
|
||||
goto ret;
|
||||
@ -1445,7 +1438,7 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
|
||||
* are guaranteed to have physical addresses. They must be removed
|
||||
* from the tree of emergency objects and the freed.
|
||||
*/
|
||||
if ((skc->skc_flags & KMC_VMEM) && !kmem_virt(obj)) {
|
||||
if ((skc->skc_flags & KMC_VMEM) && !is_vmalloc_addr(obj)) {
|
||||
spl_emergency_free(skc, obj);
|
||||
goto out;
|
||||
}
|
||||
|
@ -23,8 +23,47 @@
|
||||
*/
|
||||
|
||||
#include <sys/debug.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
/*
|
||||
* As a general rule kmem_alloc() allocations should be small, preferably
|
||||
* just a few pages since they must by physically contiguous. Therefore, a
|
||||
* rate limited warning will be printed to the console for any kmem_alloc()
|
||||
* which exceeds a reasonable threshold.
|
||||
*
|
||||
* The default warning threshold is set to eight pages but capped at 32K to
|
||||
* accommodate systems using large pages. This value was selected to be small
|
||||
* enough to ensure the largest allocations are quickly noticed and fixed.
|
||||
* But large enough to avoid logging any warnings when a allocation size is
|
||||
* larger than optimal but not a serious concern. Since this value is tunable,
|
||||
* developers are encouraged to set it lower when testing so any new largish
|
||||
* allocations are quickly caught. These warnings may be disabled by setting
|
||||
* the threshold to zero.
|
||||
*/
|
||||
unsigned int spl_kmem_alloc_warn = MAX(8 * PAGE_SIZE, 32 * 1024);
|
||||
module_param(spl_kmem_alloc_warn, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_kmem_alloc_warn,
|
||||
"Warning threshold in bytes for a kmem_alloc()");
|
||||
EXPORT_SYMBOL(spl_kmem_alloc_warn);
|
||||
|
||||
/*
|
||||
* Large kmem_alloc() allocations will fail if they exceed KMALLOC_MAX_SIZE.
|
||||
* Allocations which are marginally smaller than this limit may succeed but
|
||||
* should still be avoided due to the expense of locating a contiguous range
|
||||
* of free pages. Therefore, a maximum kmem size with reasonable safely
|
||||
* margin of 4x is set. Kmem_alloc() allocations larger than this maximum
|
||||
* will quickly fail. Vmem_alloc() allocations less than or equal to this
|
||||
* value will use kmalloc(), but shift to vmalloc() when exceeding this value.
|
||||
*/
|
||||
unsigned int spl_kmem_alloc_max = (KMALLOC_MAX_SIZE >> 2);
|
||||
module_param(spl_kmem_alloc_max, uint, 0644);
|
||||
MODULE_PARM_DESC(spl_kmem_alloc_max,
|
||||
"Maximum size in bytes for a kmem_alloc()");
|
||||
EXPORT_SYMBOL(spl_kmem_alloc_max);
|
||||
|
||||
int
|
||||
kmem_debugging(void)
|
||||
@ -72,7 +111,7 @@ __strdup(const char *str, int flags)
|
||||
int n;
|
||||
|
||||
n = strlen(str);
|
||||
ptr = kmalloc_nofail(n + 1, flags);
|
||||
ptr = kmalloc(n + 1, kmem_flags_convert(flags));
|
||||
if (ptr)
|
||||
memcpy(ptr, str, n + 1);
|
||||
|
||||
@ -94,10 +133,101 @@ strfree(char *str)
|
||||
EXPORT_SYMBOL(strfree);
|
||||
|
||||
/*
|
||||
* Memory allocation interfaces and debugging for basic kmem_*
|
||||
* and vmem_* style memory allocation. When DEBUG_KMEM is enabled
|
||||
* the SPL will keep track of the total memory allocated, and
|
||||
* report any memory leaked when the module is unloaded.
|
||||
* Limit the number of large allocation stack traces dumped to not more than
|
||||
* 5 every 60 seconds to prevent denial-of-service attacks from debug code.
|
||||
*/
|
||||
DEFINE_RATELIMIT_STATE(kmem_alloc_ratelimit_state, 60 * HZ, 5);
|
||||
|
||||
/*
|
||||
* General purpose unified implementation of kmem_alloc(). It is an
|
||||
* amalgamation of Linux and Illumos allocator design. It should never be
|
||||
* exported to ensure that code using kmem_alloc()/kmem_zalloc() remains
|
||||
* relatively portable. Consumers may only access this function through
|
||||
* wrappers that enforce the common flags to ensure portability.
|
||||
*/
|
||||
inline void *
|
||||
spl_kmem_alloc_impl(size_t size, int flags, int node)
|
||||
{
|
||||
gfp_t lflags = kmem_flags_convert(flags);
|
||||
void *ptr;
|
||||
|
||||
/*
|
||||
* Log abnormally large allocations and rate limit the console output.
|
||||
* Allocations larger than spl_kmem_alloc_warn should be performed
|
||||
* through the vmem_alloc()/vmem_zalloc() interfaces.
|
||||
*/
|
||||
if ((spl_kmem_alloc_warn > 0) && (size > spl_kmem_alloc_warn) &&
|
||||
!(flags & KM_VMEM) && __ratelimit(&kmem_alloc_ratelimit_state)) {
|
||||
printk(KERN_WARNING
|
||||
"Large kmem_alloc(%lu, 0x%x), please file an issue at:\n"
|
||||
"https://github.com/zfsonlinux/zfs/issues/new\n",
|
||||
(unsigned long)size, flags);
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
/*
|
||||
* Use a loop because kmalloc_node() can fail when GFP_KERNEL is used
|
||||
* unlike kmem_alloc() with KM_SLEEP on Illumos.
|
||||
*/
|
||||
do {
|
||||
/*
|
||||
* Calling kmalloc_node() when the size >= spl_kmem_alloc_max
|
||||
* is unsafe. This must fail for all for kmem_alloc() and
|
||||
* kmem_zalloc() callers.
|
||||
*
|
||||
* For vmem_alloc() and vmem_zalloc() callers it is permissible
|
||||
* to use __vmalloc(). However, in general use of __vmalloc()
|
||||
* is strongly discouraged because a global lock must be
|
||||
* acquired. Contention on this lock can significantly
|
||||
* impact performance so frequently manipulating the virtual
|
||||
* address space is strongly discouraged.
|
||||
*/
|
||||
if (unlikely(size > spl_kmem_alloc_max)) {
|
||||
if (flags & KM_VMEM) {
|
||||
ptr = __vmalloc(size, lflags, PAGE_KERNEL);
|
||||
} else {
|
||||
return (NULL);
|
||||
}
|
||||
} else {
|
||||
ptr = kmalloc_node(size, lflags, node);
|
||||
}
|
||||
|
||||
if (likely(ptr) || (flags & KM_NOSLEEP))
|
||||
return (ptr);
|
||||
|
||||
if (unlikely(__ratelimit(&kmem_alloc_ratelimit_state))) {
|
||||
printk(KERN_WARNING
|
||||
"Possible memory allocation deadlock: "
|
||||
"size=%lu lflags=0x%x",
|
||||
(unsigned long)size, lflags);
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
/*
|
||||
* Use cond_resched() instead of congestion_wait() to avoid
|
||||
* deadlocking systems where there are no block devices.
|
||||
*/
|
||||
cond_resched();
|
||||
} while (1);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
inline void
|
||||
spl_kmem_free_impl(const void *buf, size_t size)
|
||||
{
|
||||
if (is_vmalloc_addr(buf))
|
||||
vfree(buf);
|
||||
else
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory allocation and accounting for kmem_* * style allocations. When
|
||||
* DEBUG_KMEM is enabled the total memory allocated will be tracked and
|
||||
* any memory leaked will be reported during module unload.
|
||||
*
|
||||
* ./configure --enable-debug-kmem
|
||||
*/
|
||||
#ifdef DEBUG_KMEM
|
||||
|
||||
@ -113,6 +243,28 @@ unsigned long long kmem_alloc_max = 0;
|
||||
EXPORT_SYMBOL(kmem_alloc_used);
|
||||
EXPORT_SYMBOL(kmem_alloc_max);
|
||||
|
||||
inline void *
|
||||
spl_kmem_alloc_debug(size_t size, int flags, int node)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
ptr = spl_kmem_alloc_impl(size, flags, node);
|
||||
if (ptr) {
|
||||
kmem_alloc_used_add(size);
|
||||
if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
|
||||
kmem_alloc_max = kmem_alloc_used_read();
|
||||
}
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
|
||||
inline void
|
||||
spl_kmem_free_debug(const void *ptr, size_t size)
|
||||
{
|
||||
kmem_alloc_used_sub(size);
|
||||
spl_kmem_free_impl(ptr, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* When DEBUG_KMEM_TRACKING is enabled not only will total bytes be tracked
|
||||
* but also the location of every alloc and free. When the SPL module is
|
||||
@ -124,9 +276,14 @@ EXPORT_SYMBOL(kmem_alloc_max);
|
||||
* contended particularly on xfree(). If we want to run with this detailed
|
||||
* debugging enabled for anything other than debugging we need to minimize
|
||||
* the contention by moving to a lock per xmem_table entry model.
|
||||
*
|
||||
* ./configure --enable-debug-kmem-tracking
|
||||
*/
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
|
||||
#include <linux/hash.h>
|
||||
#include <linux/ctype.h>
|
||||
|
||||
#define KMEM_HASH_BITS 10
|
||||
#define KMEM_TABLE_SIZE (1 << KMEM_HASH_BITS)
|
||||
|
||||
@ -139,13 +296,9 @@ typedef struct kmem_debug {
|
||||
int kd_line; /* Allocation line */
|
||||
} kmem_debug_t;
|
||||
|
||||
spinlock_t kmem_lock;
|
||||
struct hlist_head kmem_table[KMEM_TABLE_SIZE];
|
||||
struct list_head kmem_list;
|
||||
|
||||
EXPORT_SYMBOL(kmem_lock);
|
||||
EXPORT_SYMBOL(kmem_table);
|
||||
EXPORT_SYMBOL(kmem_list);
|
||||
static spinlock_t kmem_lock;
|
||||
static struct hlist_head kmem_table[KMEM_TABLE_SIZE];
|
||||
static struct list_head kmem_list;
|
||||
|
||||
static kmem_debug_t *
|
||||
kmem_del_init(spinlock_t *lock, struct hlist_head *table,
|
||||
@ -174,177 +327,113 @@ kmem_del_init(spinlock_t *lock, struct hlist_head *table,
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void *
|
||||
kmem_alloc_track(size_t size, int flags, const char *func, int line,
|
||||
int node_alloc, int node)
|
||||
inline void *
|
||||
spl_kmem_alloc_track(size_t size, int flags,
|
||||
const char *func, int line, int node)
|
||||
{
|
||||
void *ptr = NULL;
|
||||
kmem_debug_t *dptr;
|
||||
unsigned long irq_flags;
|
||||
|
||||
/* Function may be called with KM_NOSLEEP so failure is possible */
|
||||
dptr = (kmem_debug_t *) kmalloc_nofail(sizeof (kmem_debug_t),
|
||||
flags & ~__GFP_ZERO);
|
||||
dptr = kmalloc(sizeof (kmem_debug_t), kmem_flags_convert(flags));
|
||||
if (dptr == NULL)
|
||||
return (NULL);
|
||||
|
||||
if (unlikely(dptr == NULL)) {
|
||||
printk(KERN_WARNING "debug kmem_alloc(%ld, 0x%x) at %s:%d "
|
||||
"failed (%lld/%llu)\n", sizeof (kmem_debug_t), flags,
|
||||
func, line, kmem_alloc_used_read(), kmem_alloc_max);
|
||||
} else {
|
||||
/*
|
||||
* Marked unlikely because we should never be doing this,
|
||||
* we tolerate to up 2 pages but a single page is best.
|
||||
*/
|
||||
if (unlikely((size > PAGE_SIZE*2) && !(flags & KM_NODEBUG))) {
|
||||
printk(KERN_WARNING "large kmem_alloc(%llu, 0x%x) "
|
||||
"at %s:%d failed (%lld/%llu)\n",
|
||||
(unsigned long long)size, flags, func, line,
|
||||
kmem_alloc_used_read(), kmem_alloc_max);
|
||||
spl_dumpstack();
|
||||
}
|
||||
|
||||
/*
|
||||
* We use __strdup() below because the string pointed to by
|
||||
* __FUNCTION__ might not be available by the time we want
|
||||
* to print it since the module might have been unloaded.
|
||||
* This can only fail in the KM_NOSLEEP case.
|
||||
*/
|
||||
dptr->kd_func = __strdup(func, flags & ~__GFP_ZERO);
|
||||
if (unlikely(dptr->kd_func == NULL)) {
|
||||
kfree(dptr);
|
||||
printk(KERN_WARNING "debug __strdup() at %s:%d "
|
||||
"failed (%lld/%llu)\n", func, line,
|
||||
kmem_alloc_used_read(), kmem_alloc_max);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Use the correct allocator */
|
||||
if (node_alloc) {
|
||||
ASSERT(!(flags & __GFP_ZERO));
|
||||
ptr = kmalloc_node_nofail(size, flags, node);
|
||||
} else if (flags & __GFP_ZERO) {
|
||||
ptr = kzalloc_nofail(size, flags & ~__GFP_ZERO);
|
||||
} else {
|
||||
ptr = kmalloc_nofail(size, flags);
|
||||
}
|
||||
|
||||
if (unlikely(ptr == NULL)) {
|
||||
kfree(dptr->kd_func);
|
||||
kfree(dptr);
|
||||
printk(KERN_WARNING "kmem_alloc(%llu, 0x%x) "
|
||||
"at %s:%d failed (%lld/%llu)\n",
|
||||
(unsigned long long) size, flags, func, line,
|
||||
kmem_alloc_used_read(), kmem_alloc_max);
|
||||
goto out;
|
||||
}
|
||||
|
||||
kmem_alloc_used_add(size);
|
||||
if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
|
||||
kmem_alloc_max = kmem_alloc_used_read();
|
||||
|
||||
INIT_HLIST_NODE(&dptr->kd_hlist);
|
||||
INIT_LIST_HEAD(&dptr->kd_list);
|
||||
|
||||
dptr->kd_addr = ptr;
|
||||
dptr->kd_size = size;
|
||||
dptr->kd_line = line;
|
||||
|
||||
spin_lock_irqsave(&kmem_lock, irq_flags);
|
||||
hlist_add_head(&dptr->kd_hlist,
|
||||
&kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
|
||||
list_add_tail(&dptr->kd_list, &kmem_list);
|
||||
spin_unlock_irqrestore(&kmem_lock, irq_flags);
|
||||
dptr->kd_func = __strdup(func, flags);
|
||||
if (dptr->kd_func == NULL) {
|
||||
kfree(dptr);
|
||||
return (NULL);
|
||||
}
|
||||
out:
|
||||
|
||||
ptr = spl_kmem_alloc_debug(size, flags, node);
|
||||
if (ptr == NULL) {
|
||||
kfree(dptr->kd_func);
|
||||
kfree(dptr);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
INIT_HLIST_NODE(&dptr->kd_hlist);
|
||||
INIT_LIST_HEAD(&dptr->kd_list);
|
||||
|
||||
dptr->kd_addr = ptr;
|
||||
dptr->kd_size = size;
|
||||
dptr->kd_line = line;
|
||||
|
||||
spin_lock_irqsave(&kmem_lock, irq_flags);
|
||||
hlist_add_head(&dptr->kd_hlist,
|
||||
&kmem_table[hash_ptr(ptr, KMEM_HASH_BITS)]);
|
||||
list_add_tail(&dptr->kd_list, &kmem_list);
|
||||
spin_unlock_irqrestore(&kmem_lock, irq_flags);
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_alloc_track);
|
||||
|
||||
void
|
||||
kmem_free_track(const void *ptr, size_t size)
|
||||
inline void
|
||||
spl_kmem_free_track(const void *ptr, size_t size)
|
||||
{
|
||||
kmem_debug_t *dptr;
|
||||
|
||||
ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
|
||||
(unsigned long long) size);
|
||||
|
||||
/* Must exist in hash due to kmem_alloc() */
|
||||
dptr = kmem_del_init(&kmem_lock, kmem_table, KMEM_HASH_BITS, ptr);
|
||||
ASSERT(dptr);
|
||||
ASSERT3P(dptr, !=, NULL);
|
||||
ASSERT3S(dptr->kd_size, ==, size);
|
||||
|
||||
/* Size must match */
|
||||
ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), "
|
||||
"kd_func = %s, kd_line = %d\n", (unsigned long long) dptr->kd_size,
|
||||
(unsigned long long) size, dptr->kd_func, dptr->kd_line);
|
||||
|
||||
kmem_alloc_used_sub(size);
|
||||
kfree(dptr->kd_func);
|
||||
|
||||
memset((void *)dptr, 0x5a, sizeof (kmem_debug_t));
|
||||
kfree(dptr);
|
||||
|
||||
memset((void *)ptr, 0x5a, size);
|
||||
kfree(ptr);
|
||||
spl_kmem_free_debug(ptr, size);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_free_track);
|
||||
|
||||
#else /* DEBUG_KMEM_TRACKING */
|
||||
|
||||
void *
|
||||
kmem_alloc_debug(size_t size, int flags, const char *func, int line,
|
||||
int node_alloc, int node)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
/*
|
||||
* Marked unlikely because we should never be doing this,
|
||||
* we tolerate to up 2 pages but a single page is best.
|
||||
*/
|
||||
if (unlikely((size > PAGE_SIZE * 2) && !(flags & KM_NODEBUG))) {
|
||||
printk(KERN_WARNING
|
||||
"large kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n",
|
||||
(unsigned long long)size, flags, func, line,
|
||||
(unsigned long long)kmem_alloc_used_read(), kmem_alloc_max);
|
||||
spl_dumpstack();
|
||||
}
|
||||
|
||||
/* Use the correct allocator */
|
||||
if (node_alloc) {
|
||||
ASSERT(!(flags & __GFP_ZERO));
|
||||
ptr = kmalloc_node_nofail(size, flags, node);
|
||||
} else if (flags & __GFP_ZERO) {
|
||||
ptr = kzalloc_nofail(size, flags & (~__GFP_ZERO));
|
||||
} else {
|
||||
ptr = kmalloc_nofail(size, flags);
|
||||
}
|
||||
|
||||
if (unlikely(ptr == NULL)) {
|
||||
printk(KERN_WARNING
|
||||
"kmem_alloc(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n",
|
||||
(unsigned long long)size, flags, func, line,
|
||||
(unsigned long long)kmem_alloc_used_read(), kmem_alloc_max);
|
||||
} else {
|
||||
kmem_alloc_used_add(size);
|
||||
if (unlikely(kmem_alloc_used_read() > kmem_alloc_max))
|
||||
kmem_alloc_max = kmem_alloc_used_read();
|
||||
}
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_alloc_debug);
|
||||
|
||||
void
|
||||
kmem_free_debug(const void *ptr, size_t size)
|
||||
{
|
||||
ASSERT(ptr || size > 0);
|
||||
kmem_alloc_used_sub(size);
|
||||
kfree(ptr);
|
||||
}
|
||||
EXPORT_SYMBOL(kmem_free_debug);
|
||||
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#endif /* DEBUG_KMEM */
|
||||
|
||||
/*
|
||||
* Public kmem_alloc(), kmem_zalloc() and kmem_free() interfaces.
|
||||
*/
|
||||
void *
|
||||
spl_kmem_alloc(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
|
||||
#else
|
||||
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kmem_alloc);
|
||||
|
||||
void *
|
||||
spl_kmem_zalloc(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
|
||||
flags |= KM_ZERO;
|
||||
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
|
||||
#else
|
||||
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kmem_zalloc);
|
||||
|
||||
void
|
||||
spl_kmem_free(const void *buf, size_t size)
|
||||
{
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_free_impl(buf, size));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_free_debug(buf, size));
|
||||
#else
|
||||
return (spl_kmem_free_track(buf, size));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_kmem_free);
|
||||
|
||||
#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
|
||||
static char *
|
||||
spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
|
||||
@ -424,22 +513,20 @@ spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
|
||||
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
#else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
|
||||
#define spl_kmem_init_tracking(list, lock, size)
|
||||
#define spl_kmem_fini_tracking(list, lock)
|
||||
#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
|
||||
|
||||
int
|
||||
spl_kmem_init(void)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
#ifdef DEBUG_KMEM
|
||||
kmem_alloc_used_set(0);
|
||||
spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
|
||||
#endif
|
||||
|
||||
return (rc);
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#endif /* DEBUG_KMEM */
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
@ -454,8 +541,10 @@ spl_kmem_fini(void)
|
||||
*/
|
||||
if (kmem_alloc_used_read() != 0)
|
||||
printk(KERN_WARNING "kmem leaked %ld/%llu bytes\n",
|
||||
kmem_alloc_used_read(), kmem_alloc_max);
|
||||
(unsigned long)kmem_alloc_used_read(), kmem_alloc_max);
|
||||
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#endif /* DEBUG_KMEM */
|
||||
}
|
||||
|
@ -353,26 +353,6 @@ static struct ctl_table spl_kmem_table[] = {
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
},
|
||||
{
|
||||
.procname = "vmem_used",
|
||||
.data = &vmem_alloc_used,
|
||||
# ifdef HAVE_ATOMIC64_T
|
||||
.maxlen = sizeof(atomic64_t),
|
||||
# else
|
||||
.maxlen = sizeof(atomic_t),
|
||||
# endif /* HAVE_ATOMIC64_T */
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_domemused,
|
||||
},
|
||||
{
|
||||
.procname = "vmem_max",
|
||||
.data = &vmem_alloc_max,
|
||||
.maxlen = sizeof(unsigned long),
|
||||
.extra1 = &table_min,
|
||||
.extra2 = &table_max,
|
||||
.mode = 0444,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
},
|
||||
{
|
||||
.procname = "slab_kmem_total",
|
||||
.data = (void *)(KMC_KMEM | KMC_TOTAL),
|
||||
|
@ -337,8 +337,7 @@ tsd_hash_table_init(uint_t bits)
|
||||
if (table == NULL)
|
||||
return (NULL);
|
||||
|
||||
table->ht_bins = kmem_zalloc(sizeof(tsd_hash_bin_t) * size,
|
||||
KM_SLEEP | KM_NODEBUG);
|
||||
table->ht_bins = kmem_zalloc(sizeof(tsd_hash_bin_t) * size, KM_SLEEP);
|
||||
if (table->ht_bins == NULL) {
|
||||
kmem_free(table, sizeof(tsd_hash_table_t));
|
||||
return (NULL);
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include <sys/debug.h>
|
||||
#include <sys/vmem.h>
|
||||
#include <linux/mm_compat.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
vmem_t *heap_arena = NULL;
|
||||
@ -47,314 +48,62 @@ vmem_size(vmem_t *vmp, int typemask)
|
||||
EXPORT_SYMBOL(vmem_size);
|
||||
|
||||
/*
|
||||
* Memory allocation interfaces and debugging for basic kmem_*
|
||||
* and vmem_* style memory allocation. When DEBUG_KMEM is enabled
|
||||
* the SPL will keep track of the total memory allocated, and
|
||||
* report any memory leaked when the module is unloaded.
|
||||
* Public vmem_alloc(), vmem_zalloc() and vmem_free() interfaces.
|
||||
*/
|
||||
#ifdef DEBUG_KMEM
|
||||
void *
|
||||
spl_vmem_alloc(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
|
||||
/* Shim layer memory accounting */
|
||||
#ifdef HAVE_ATOMIC64_T
|
||||
atomic64_t vmem_alloc_used = ATOMIC64_INIT(0);
|
||||
unsigned long long vmem_alloc_max = 0;
|
||||
#else /* HAVE_ATOMIC64_T */
|
||||
atomic_t vmem_alloc_used = ATOMIC_INIT(0);
|
||||
unsigned long long vmem_alloc_max = 0;
|
||||
#endif /* HAVE_ATOMIC64_T */
|
||||
flags |= KM_VMEM;
|
||||
|
||||
EXPORT_SYMBOL(vmem_alloc_used);
|
||||
EXPORT_SYMBOL(vmem_alloc_max);
|
||||
|
||||
/*
|
||||
* When DEBUG_KMEM_TRACKING is enabled not only will total bytes be tracked
|
||||
* but also the location of every alloc and free. When the SPL module is
|
||||
* unloaded a list of all leaked addresses and where they were allocated
|
||||
* will be dumped to the console. Enabling this feature has a significant
|
||||
* impact on performance but it makes finding memory leaks straight forward.
|
||||
*
|
||||
* Not surprisingly with debugging enabled the xmem_locks are very highly
|
||||
* contended particularly on xfree(). If we want to run with this detailed
|
||||
* debugging enabled for anything other than debugging we need to minimize
|
||||
* the contention by moving to a lock per xmem_table entry model.
|
||||
*/
|
||||
#ifdef DEBUG_KMEM_TRACKING
|
||||
|
||||
#define VMEM_HASH_BITS 10
|
||||
#define VMEM_TABLE_SIZE (1 << VMEM_HASH_BITS)
|
||||
|
||||
typedef struct kmem_debug {
|
||||
struct hlist_node kd_hlist; /* Hash node linkage */
|
||||
struct list_head kd_list; /* List of all allocations */
|
||||
void *kd_addr; /* Allocation pointer */
|
||||
size_t kd_size; /* Allocation size */
|
||||
const char *kd_func; /* Allocation function */
|
||||
int kd_line; /* Allocation line */
|
||||
} kmem_debug_t;
|
||||
|
||||
spinlock_t vmem_lock;
|
||||
struct hlist_head vmem_table[VMEM_TABLE_SIZE];
|
||||
struct list_head vmem_list;
|
||||
|
||||
EXPORT_SYMBOL(vmem_lock);
|
||||
EXPORT_SYMBOL(vmem_table);
|
||||
EXPORT_SYMBOL(vmem_list);
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
|
||||
#else
|
||||
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(spl_vmem_alloc);
|
||||
|
||||
void *
|
||||
vmem_alloc_track(size_t size, int flags, const char *func, int line)
|
||||
spl_vmem_zalloc(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
void *ptr = NULL;
|
||||
kmem_debug_t *dptr;
|
||||
unsigned long irq_flags;
|
||||
ASSERT0(flags & ~KM_PUBLIC_MASK);
|
||||
|
||||
ASSERT(flags & KM_SLEEP);
|
||||
flags |= (KM_VMEM | KM_ZERO);
|
||||
|
||||
/* Function may be called with KM_NOSLEEP so failure is possible */
|
||||
dptr = (kmem_debug_t *) kmalloc_nofail(sizeof (kmem_debug_t),
|
||||
flags & ~__GFP_ZERO);
|
||||
if (unlikely(dptr == NULL)) {
|
||||
printk(KERN_WARNING "debug vmem_alloc(%ld, 0x%x) "
|
||||
"at %s:%d failed (%lld/%llu)\n",
|
||||
sizeof (kmem_debug_t), flags, func, line,
|
||||
vmem_alloc_used_read(), vmem_alloc_max);
|
||||
} else {
|
||||
/*
|
||||
* We use __strdup() below because the string pointed to by
|
||||
* __FUNCTION__ might not be available by the time we want
|
||||
* to print it, since the module might have been unloaded.
|
||||
* This can never fail because we have already asserted
|
||||
* that flags is KM_SLEEP.
|
||||
*/
|
||||
dptr->kd_func = __strdup(func, flags & ~__GFP_ZERO);
|
||||
if (unlikely(dptr->kd_func == NULL)) {
|
||||
kfree(dptr);
|
||||
printk(KERN_WARNING "debug __strdup() at %s:%d "
|
||||
"failed (%lld/%llu)\n", func, line,
|
||||
vmem_alloc_used_read(), vmem_alloc_max);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Use the correct allocator */
|
||||
if (flags & __GFP_ZERO) {
|
||||
ptr = vzalloc_nofail(size, flags & ~__GFP_ZERO);
|
||||
} else {
|
||||
ptr = vmalloc_nofail(size, flags);
|
||||
}
|
||||
|
||||
if (unlikely(ptr == NULL)) {
|
||||
kfree(dptr->kd_func);
|
||||
kfree(dptr);
|
||||
printk(KERN_WARNING "vmem_alloc (%llu, 0x%x) "
|
||||
"at %s:%d failed (%lld/%llu)\n",
|
||||
(unsigned long long) size, flags, func, line,
|
||||
vmem_alloc_used_read(), vmem_alloc_max);
|
||||
goto out;
|
||||
}
|
||||
|
||||
vmem_alloc_used_add(size);
|
||||
if (unlikely(vmem_alloc_used_read() > vmem_alloc_max))
|
||||
vmem_alloc_max = vmem_alloc_used_read();
|
||||
|
||||
INIT_HLIST_NODE(&dptr->kd_hlist);
|
||||
INIT_LIST_HEAD(&dptr->kd_list);
|
||||
|
||||
dptr->kd_addr = ptr;
|
||||
dptr->kd_size = size;
|
||||
dptr->kd_line = line;
|
||||
|
||||
spin_lock_irqsave(&vmem_lock, irq_flags);
|
||||
hlist_add_head(&dptr->kd_hlist,
|
||||
&vmem_table[hash_ptr(ptr, VMEM_HASH_BITS)]);
|
||||
list_add_tail(&dptr->kd_list, &vmem_list);
|
||||
spin_unlock_irqrestore(&vmem_lock, irq_flags);
|
||||
}
|
||||
out:
|
||||
return (ptr);
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_alloc_impl(size, flags, NUMA_NO_NODE));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_alloc_debug(size, flags, NUMA_NO_NODE));
|
||||
#else
|
||||
return (spl_kmem_alloc_track(size, flags, func, line, NUMA_NO_NODE));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(vmem_alloc_track);
|
||||
EXPORT_SYMBOL(spl_vmem_zalloc);
|
||||
|
||||
void
|
||||
vmem_free_track(const void *ptr, size_t size)
|
||||
spl_vmem_free(const void *buf, size_t size)
|
||||
{
|
||||
kmem_debug_t *dptr;
|
||||
|
||||
ASSERTF(ptr || size > 0, "ptr: %p, size: %llu", ptr,
|
||||
(unsigned long long) size);
|
||||
|
||||
/* Must exist in hash due to vmem_alloc() */
|
||||
dptr = kmem_del_init(&vmem_lock, vmem_table, VMEM_HASH_BITS, ptr);
|
||||
ASSERT(dptr);
|
||||
|
||||
/* Size must match */
|
||||
ASSERTF(dptr->kd_size == size, "kd_size (%llu) != size (%llu), "
|
||||
"kd_func = %s, kd_line = %d\n", (unsigned long long) dptr->kd_size,
|
||||
(unsigned long long) size, dptr->kd_func, dptr->kd_line);
|
||||
|
||||
vmem_alloc_used_sub(size);
|
||||
kfree(dptr->kd_func);
|
||||
|
||||
memset((void *)dptr, 0x5a, sizeof (kmem_debug_t));
|
||||
kfree(dptr);
|
||||
|
||||
memset((void *)ptr, 0x5a, size);
|
||||
vfree(ptr);
|
||||
#if !defined(DEBUG_KMEM)
|
||||
return (spl_kmem_free_impl(buf, size));
|
||||
#elif !defined(DEBUG_KMEM_TRACKING)
|
||||
return (spl_kmem_free_debug(buf, size));
|
||||
#else
|
||||
return (spl_kmem_free_track(buf, size));
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(vmem_free_track);
|
||||
|
||||
#else /* DEBUG_KMEM_TRACKING */
|
||||
|
||||
void *
|
||||
vmem_alloc_debug(size_t size, int flags, const char *func, int line)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
ASSERT(flags & KM_SLEEP);
|
||||
|
||||
/* Use the correct allocator */
|
||||
if (flags & __GFP_ZERO) {
|
||||
ptr = vzalloc_nofail(size, flags & (~__GFP_ZERO));
|
||||
} else {
|
||||
ptr = vmalloc_nofail(size, flags);
|
||||
}
|
||||
|
||||
if (unlikely(ptr == NULL)) {
|
||||
printk(KERN_WARNING
|
||||
"vmem_alloc(%llu, 0x%x) at %s:%d failed (%lld/%llu)\n",
|
||||
(unsigned long long)size, flags, func, line,
|
||||
(unsigned long long)vmem_alloc_used_read(), vmem_alloc_max);
|
||||
} else {
|
||||
vmem_alloc_used_add(size);
|
||||
if (unlikely(vmem_alloc_used_read() > vmem_alloc_max))
|
||||
vmem_alloc_max = vmem_alloc_used_read();
|
||||
}
|
||||
|
||||
return (ptr);
|
||||
}
|
||||
EXPORT_SYMBOL(vmem_alloc_debug);
|
||||
|
||||
void
|
||||
vmem_free_debug(const void *ptr, size_t size)
|
||||
{
|
||||
ASSERT(ptr || size > 0);
|
||||
vmem_alloc_used_sub(size);
|
||||
vfree(ptr);
|
||||
}
|
||||
EXPORT_SYMBOL(vmem_free_debug);
|
||||
|
||||
#endif /* DEBUG_KMEM_TRACKING */
|
||||
#endif /* DEBUG_KMEM */
|
||||
|
||||
#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
|
||||
static char *
|
||||
spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
|
||||
{
|
||||
int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
|
||||
int i, flag = 1;
|
||||
|
||||
ASSERT(str != NULL && len >= 17);
|
||||
memset(str, 0, len);
|
||||
|
||||
/*
|
||||
* Check for a fully printable string, and while we are at
|
||||
* it place the printable characters in the passed buffer.
|
||||
*/
|
||||
for (i = 0; i < size; i++) {
|
||||
str[i] = ((char *)(kd->kd_addr))[i];
|
||||
if (isprint(str[i])) {
|
||||
continue;
|
||||
} else {
|
||||
/*
|
||||
* Minimum number of printable characters found
|
||||
* to make it worthwhile to print this as ascii.
|
||||
*/
|
||||
if (i > min)
|
||||
break;
|
||||
|
||||
flag = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!flag) {
|
||||
sprintf(str, "%02x%02x%02x%02x%02x%02x%02x%02x",
|
||||
*((uint8_t *)kd->kd_addr),
|
||||
*((uint8_t *)kd->kd_addr + 2),
|
||||
*((uint8_t *)kd->kd_addr + 4),
|
||||
*((uint8_t *)kd->kd_addr + 6),
|
||||
*((uint8_t *)kd->kd_addr + 8),
|
||||
*((uint8_t *)kd->kd_addr + 10),
|
||||
*((uint8_t *)kd->kd_addr + 12),
|
||||
*((uint8_t *)kd->kd_addr + 14));
|
||||
}
|
||||
|
||||
return (str);
|
||||
}
|
||||
|
||||
static int
|
||||
spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
spin_lock_init(lock);
|
||||
INIT_LIST_HEAD(list);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
INIT_HLIST_HEAD(&kmem_table[i]);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
|
||||
{
|
||||
unsigned long flags;
|
||||
kmem_debug_t *kd;
|
||||
char str[17];
|
||||
|
||||
spin_lock_irqsave(lock, flags);
|
||||
if (!list_empty(list))
|
||||
printk(KERN_WARNING "%-16s %-5s %-16s %s:%s\n", "address",
|
||||
"size", "data", "func", "line");
|
||||
|
||||
list_for_each_entry(kd, list, kd_list)
|
||||
printk(KERN_WARNING "%p %-5d %-16s %s:%d\n", kd->kd_addr,
|
||||
(int)kd->kd_size, spl_sprintf_addr(kd, str, 17, 8),
|
||||
kd->kd_func, kd->kd_line);
|
||||
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
#else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
|
||||
#define spl_kmem_init_tracking(list, lock, size)
|
||||
#define spl_kmem_fini_tracking(list, lock)
|
||||
#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
|
||||
EXPORT_SYMBOL(spl_vmem_free);
|
||||
|
||||
int
|
||||
spl_vmem_init(void)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
#ifdef DEBUG_KMEM
|
||||
vmem_alloc_used_set(0);
|
||||
spl_kmem_init_tracking(&vmem_list, &vmem_lock, VMEM_TABLE_SIZE);
|
||||
#endif
|
||||
|
||||
return (rc);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_vmem_fini(void)
|
||||
{
|
||||
#ifdef DEBUG_KMEM
|
||||
/*
|
||||
* Display all unreclaimed memory addresses, including the
|
||||
* allocation size and the first few bytes of what's located
|
||||
* at that address to aid in debugging. Performance is not
|
||||
* a serious concern here since it is module unload time.
|
||||
*/
|
||||
if (vmem_alloc_used_read() != 0)
|
||||
printk(KERN_WARNING "vmem leaked %ld/%llu bytes\n",
|
||||
vmem_alloc_used_read(), vmem_alloc_max);
|
||||
|
||||
spl_kmem_fini_tracking(&vmem_list, &vmem_lock);
|
||||
#endif /* DEBUG_KMEM */
|
||||
}
|
||||
|
@ -95,11 +95,11 @@ splat_kmem_test1(struct file *file, void *arg)
|
||||
int size = PAGE_SIZE;
|
||||
int i, count, rc = 0;
|
||||
|
||||
while ((!rc) && (size <= (PAGE_SIZE * 32))) {
|
||||
while ((!rc) && (size <= spl_kmem_alloc_warn)) {
|
||||
count = 0;
|
||||
|
||||
for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) {
|
||||
ptr[i] = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
|
||||
ptr[i] = kmem_alloc(size, KM_SLEEP);
|
||||
if (ptr[i])
|
||||
count++;
|
||||
}
|
||||
@ -127,11 +127,11 @@ splat_kmem_test2(struct file *file, void *arg)
|
||||
int size = PAGE_SIZE;
|
||||
int i, j, count, rc = 0;
|
||||
|
||||
while ((!rc) && (size <= (PAGE_SIZE * 32))) {
|
||||
while ((!rc) && (size <= spl_kmem_alloc_warn)) {
|
||||
count = 0;
|
||||
|
||||
for (i = 0; i < SPLAT_KMEM_ALLOC_COUNT; i++) {
|
||||
ptr[i] = kmem_zalloc(size, KM_SLEEP | KM_NODEBUG);
|
||||
ptr[i] = kmem_zalloc(size, KM_SLEEP);
|
||||
if (ptr[i])
|
||||
count++;
|
||||
}
|
||||
@ -171,7 +171,11 @@ splat_kmem_test3(struct file *file, void *arg)
|
||||
int size = PAGE_SIZE;
|
||||
int i, count, rc = 0;
|
||||
|
||||
while ((!rc) && (size <= (PAGE_SIZE * 1024))) {
|
||||
/*
|
||||
* Test up to 4x the maximum kmem_alloc() size to ensure both
|
||||
* the kmem_alloc() and vmem_alloc() call paths are used.
|
||||
*/
|
||||
while ((!rc) && (size <= (4 * spl_kmem_alloc_max))) {
|
||||
count = 0;
|
||||
|
||||
for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) {
|
||||
@ -203,7 +207,11 @@ splat_kmem_test4(struct file *file, void *arg)
|
||||
int size = PAGE_SIZE;
|
||||
int i, j, count, rc = 0;
|
||||
|
||||
while ((!rc) && (size <= (PAGE_SIZE * 1024))) {
|
||||
/*
|
||||
* Test up to 4x the maximum kmem_zalloc() size to ensure both
|
||||
* the kmem_zalloc() and vmem_zalloc() call paths are used.
|
||||
*/
|
||||
while ((!rc) && (size <= (4 * spl_kmem_alloc_max))) {
|
||||
count = 0;
|
||||
|
||||
for (i = 0; i < SPLAT_VMEM_ALLOC_COUNT; i++) {
|
||||
|
Loading…
Reference in New Issue
Block a user