Further slab improvements, I'm getting close to something which works

well for the expected workloads.  Improvement in this commit include:

- Added DEBUG_KMEM_TRACKING #define which can optionally be set
  when DEBUG_KMEM is defined to do per allocation tracking.  This
  allows us to get all the lightweight kmem debugging enabled by
  default which is pretty light weight, and only when looking 
  for a memory leak we can briefly enable the per alloc tracking.

- Added set_normalized_timespec() in to SPL to simply using
  the timespec() primatives from within a module.

- Added per-spinlock cycle counters to the slab in an attempt
  to run down a lock contention issue.  The contended lock 
  was in vmalloc() but I'm going to leave the cycle counters
  in place for a little while until I'm convinced there arn't
  other locking improvement possible in the slab.

- Added a proc interface to the slab to export per slab
  cache statistics to /proc/spl/kmem/slab for analysis.

- Reworked spl_slab_alloc() function to allocate from kmem for
  small allocation and vmem for large allocations.  This improved
  things considerably but futher work is needed.



git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@138 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c
This commit is contained in:
behlendo 2008-06-27 21:40:11 +00:00
parent e9d7a2bef5
commit ff449ac406
5 changed files with 359 additions and 97 deletions

View File

@ -32,6 +32,7 @@ extern "C" {
#endif #endif
#undef DEBUG_KMEM_UNIMPLEMENTED #undef DEBUG_KMEM_UNIMPLEMENTED
#undef DEBUG_KMEM_TRACKING /* Per-allocation memory tracking */
#include <linux/module.h> #include <linux/module.h>
#include <linux/slab.h> #include <linux/slab.h>
@ -58,10 +59,9 @@ extern atomic64_t kmem_alloc_used;
extern unsigned long kmem_alloc_max; extern unsigned long kmem_alloc_max;
extern atomic64_t vmem_alloc_used; extern atomic64_t vmem_alloc_used;
extern unsigned long vmem_alloc_max; extern unsigned long vmem_alloc_max;
extern int kmem_warning_flag; extern int kmem_warning_flag;
extern atomic64_t kmem_cache_alloc_failed;
#ifdef DEBUG_KMEM_TRACKING
/* XXX - Not to surprisingly with debugging enabled the xmem_locks are very /* XXX - Not to surprisingly with debugging enabled the xmem_locks are very
* highly contended particularly on xfree(). If we want to run with this * highly contended particularly on xfree(). If we want to run with this
* detailed debugging enabled for anything other than debugging we need to * detailed debugging enabled for anything other than debugging we need to
@ -171,9 +171,6 @@ __kmem_del_init(spinlock_t *lock,struct hlist_head *table,int bits,void *addr)
_ptr_; \ _ptr_; \
}) })
#define kmem_alloc(size, flags) __kmem_alloc((size), (flags), kmalloc)
#define kmem_zalloc(size, flags) __kmem_alloc((size), (flags), kzalloc)
#define kmem_free(ptr, size) \ #define kmem_free(ptr, size) \
({ \ ({ \
kmem_debug_t *_dptr_; \ kmem_debug_t *_dptr_; \
@ -252,10 +249,6 @@ __kmem_del_init(spinlock_t *lock,struct hlist_head *table,int bits,void *addr)
_ptr_; \ _ptr_; \
}) })
#define vmem_alloc(size, flags) __vmem_alloc((size), (flags))
#define vmem_zalloc(size, flags) __vmem_alloc((size), ((flags) | \
__GFP_ZERO))
#define vmem_free(ptr, size) \ #define vmem_free(ptr, size) \
({ \ ({ \
kmem_debug_t *_dptr_; \ kmem_debug_t *_dptr_; \
@ -278,6 +271,108 @@ __kmem_del_init(spinlock_t *lock,struct hlist_head *table,int bits,void *addr)
vfree(ptr); \ vfree(ptr); \
}) })
#else /* DEBUG_KMEM_TRACKING */
#define __kmem_alloc(size, flags, allocator) \
({ void *_ptr_ = NULL; \
\
/* Marked unlikely because we should never be doing this, */ \
/* we tolerate to up 2 pages but a single page is best. */ \
if (unlikely((size) > (PAGE_SIZE * 2)) && kmem_warning_flag) \
__CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning large " \
"kmem_alloc(%d, 0x%x) (%ld/%ld)\n", \
(int)(size), (int)(flags), \
atomic64_read(&kmem_alloc_used), \
kmem_alloc_max); \
\
_ptr_ = (void *)allocator((size), (flags)); \
if (_ptr_ == NULL) { \
__CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning " \
"kmem_alloc(%d, 0x%x) failed (%ld/" \
"%ld)\n", (int)(size), (int)(flags), \
atomic64_read(&kmem_alloc_used), \
kmem_alloc_max); \
} else { \
atomic64_add((size), &kmem_alloc_used); \
if (unlikely(atomic64_read(&kmem_alloc_used) > \
kmem_alloc_max)) \
kmem_alloc_max = \
atomic64_read(&kmem_alloc_used); \
\
__CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(%d, 0x%x) = %p " \
"(%ld/%ld)\n", (int)(size), (int)(flags), \
_ptr_, atomic64_read(&kmem_alloc_used), \
kmem_alloc_max); \
} \
\
_ptr_; \
})
#define kmem_free(ptr, size) \
({ \
ASSERT((ptr) || (size > 0)); \
\
atomic64_sub((size), &kmem_alloc_used); \
__CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%p, %d) (%ld/%ld)\n", \
(ptr), (int)(size), atomic64_read(&kmem_alloc_used), \
kmem_alloc_max); \
memset(ptr, 0x5a, (size)); \
kfree(ptr); \
})
#define __vmem_alloc(size, flags) \
({ void *_ptr_ = NULL; \
\
ASSERT((flags) & KM_SLEEP); \
\
_ptr_ = (void *)__vmalloc((size), (((flags) | \
__GFP_HIGHMEM) & ~__GFP_ZERO), PAGE_KERNEL);\
if (_ptr_ == NULL) { \
__CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning " \
"vmem_alloc(%d, 0x%x) failed (%ld/" \
"%ld)\n", (int)(size), (int)(flags), \
atomic64_read(&vmem_alloc_used), \
vmem_alloc_max); \
} else { \
if (flags & __GFP_ZERO) \
memset(_ptr_, 0, (size)); \
\
atomic64_add((size), &vmem_alloc_used); \
if (unlikely(atomic64_read(&vmem_alloc_used) > \
vmem_alloc_max)) \
vmem_alloc_max = \
atomic64_read(&vmem_alloc_used); \
\
__CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(" \
"%d, 0x%x) = %p (%ld/%ld)\n", \
(int)(size), (int)(flags), _ptr_, \
atomic64_read(&vmem_alloc_used), \
vmem_alloc_max); \
} \
\
_ptr_; \
})
#define vmem_free(ptr, size) \
({ \
ASSERT((ptr) || (size > 0)); \
\
atomic64_sub((size), &vmem_alloc_used); \
__CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%p, %d) (%ld/%ld)\n", \
(ptr), (int)(size), atomic64_read(&vmem_alloc_used), \
vmem_alloc_max); \
memset(ptr, 0x5a, (size)); \
vfree(ptr); \
})
#endif /* DEBUG_KMEM_TRACKING */
#define kmem_alloc(size, flags) __kmem_alloc((size), (flags), kmalloc)
#define kmem_zalloc(size, flags) __kmem_alloc((size), (flags), kzalloc)
#define vmem_alloc(size, flags) __vmem_alloc((size), (flags))
#define vmem_zalloc(size, flags) __vmem_alloc((size), ((flags) | __GFP_ZERO))
#else /* DEBUG_KMEM */ #else /* DEBUG_KMEM */
#define kmem_alloc(size, flags) kmalloc((size), (flags)) #define kmem_alloc(size, flags) kmalloc((size), (flags))
@ -359,6 +454,8 @@ kmem_debugging(void)
extern int kmem_set_warning(int flag); extern int kmem_set_warning(int flag);
extern struct list_head spl_kmem_cache_list;
extern struct rw_semaphore spl_kmem_cache_sem;
#define SKM_MAGIC 0x2e2e2e2e #define SKM_MAGIC 0x2e2e2e2e
#define SKO_MAGIC 0x20202020 #define SKO_MAGIC 0x20202020
@ -442,6 +539,11 @@ typedef struct spl_kmem_cache {
uint64_t skc_obj_max; /* Obj max historic */ uint64_t skc_obj_max; /* Obj max historic */
uint64_t skc_hash_depth; /* Lazy hash depth */ uint64_t skc_hash_depth; /* Lazy hash depth */
uint64_t skc_hash_count; /* Hash entries current */ uint64_t skc_hash_count; /* Hash entries current */
cycles_t skc_lock_reclaim;
cycles_t skc_lock_destroy;
cycles_t skc_lock_grow;
cycles_t skc_lock_refill;
cycles_t skc_lock_flush;
} spl_kmem_cache_t; } spl_kmem_cache_t;
extern spl_kmem_cache_t * extern spl_kmem_cache_t *

View File

@ -48,8 +48,14 @@ unsigned long kmem_alloc_max = 0;
atomic64_t vmem_alloc_used; atomic64_t vmem_alloc_used;
unsigned long vmem_alloc_max = 0; unsigned long vmem_alloc_max = 0;
int kmem_warning_flag = 1; int kmem_warning_flag = 1;
atomic64_t kmem_cache_alloc_failed;
EXPORT_SYMBOL(kmem_alloc_used);
EXPORT_SYMBOL(kmem_alloc_max);
EXPORT_SYMBOL(vmem_alloc_used);
EXPORT_SYMBOL(vmem_alloc_max);
EXPORT_SYMBOL(kmem_warning_flag);
#ifdef DEBUG_KMEM_TRACKING
spinlock_t kmem_lock; spinlock_t kmem_lock;
struct hlist_head kmem_table[KMEM_TABLE_SIZE]; struct hlist_head kmem_table[KMEM_TABLE_SIZE];
struct list_head kmem_list; struct list_head kmem_list;
@ -58,12 +64,6 @@ spinlock_t vmem_lock;
struct hlist_head vmem_table[VMEM_TABLE_SIZE]; struct hlist_head vmem_table[VMEM_TABLE_SIZE];
struct list_head vmem_list; struct list_head vmem_list;
EXPORT_SYMBOL(kmem_alloc_used);
EXPORT_SYMBOL(kmem_alloc_max);
EXPORT_SYMBOL(vmem_alloc_used);
EXPORT_SYMBOL(vmem_alloc_max);
EXPORT_SYMBOL(kmem_warning_flag);
EXPORT_SYMBOL(kmem_lock); EXPORT_SYMBOL(kmem_lock);
EXPORT_SYMBOL(kmem_table); EXPORT_SYMBOL(kmem_table);
EXPORT_SYMBOL(kmem_list); EXPORT_SYMBOL(kmem_list);
@ -71,6 +71,7 @@ EXPORT_SYMBOL(kmem_list);
EXPORT_SYMBOL(vmem_lock); EXPORT_SYMBOL(vmem_lock);
EXPORT_SYMBOL(vmem_table); EXPORT_SYMBOL(vmem_table);
EXPORT_SYMBOL(vmem_list); EXPORT_SYMBOL(vmem_list);
#endif
int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); } int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); }
#else #else
@ -109,8 +110,6 @@ EXPORT_SYMBOL(kmem_set_warning);
* small virtual address space on 32bit arches. This will seriously * small virtual address space on 32bit arches. This will seriously
* constrain the size of the slab caches and their performance. * constrain the size of the slab caches and their performance.
* *
* XXX: Implement SPL proc interface to export full per cache stats.
*
* XXX: Implement work requests to keep an eye on each cache and * XXX: Implement work requests to keep an eye on each cache and
* shrink them via spl_slab_reclaim() when they are wasting lots * shrink them via spl_slab_reclaim() when they are wasting lots
* of space. Currently this process is driven by the reapers. * of space. Currently this process is driven by the reapers.
@ -149,10 +148,10 @@ EXPORT_SYMBOL(kmem_set_warning);
#undef kmem_cache_alloc #undef kmem_cache_alloc
#undef kmem_cache_free #undef kmem_cache_free
static struct list_head spl_kmem_cache_list; /* List of caches */ struct list_head spl_kmem_cache_list; /* List of caches */
static struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */ static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */
static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */ static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */
static int spl_cache_flush(spl_kmem_cache_t *skc, static int spl_cache_flush(spl_kmem_cache_t *skc,
spl_kmem_magazine_t *skm, int flush); spl_kmem_magazine_t *skm, int flush);
@ -206,7 +205,17 @@ out_alloc:
GOTO(out, sks = NULL); GOTO(out, sks = NULL);
} }
sko->sko_addr = vmem_alloc(skc->skc_obj_size, flags); /* Objects less than a page can use kmem_alloc() and avoid
* the locking overhead in __get_vm_area_node() when locking
* for a free address. For objects over a page we use
* vmem_alloc() because it is usually worth paying this
* overhead to avoid the need to find contigeous pages.
* This should give us the best of both worlds. */
if (skc->skc_obj_size <= PAGE_SIZE)
sko->sko_addr = kmem_alloc(skc->skc_obj_size, flags);
else
sko->sko_addr = vmem_alloc(skc->skc_obj_size, flags);
if (sko->sko_addr == NULL) { if (sko->sko_addr == NULL) {
kmem_cache_free(spl_obj_cache, sko); kmem_cache_free(spl_obj_cache, sko);
GOTO(out_alloc, sks = NULL); GOTO(out_alloc, sks = NULL);
@ -248,7 +257,11 @@ spl_slab_free(spl_kmem_slab_t *sks) {
if (skc->skc_dtor) if (skc->skc_dtor)
skc->skc_dtor(sko->sko_addr, skc->skc_private); skc->skc_dtor(sko->sko_addr, skc->skc_private);
vmem_free(sko->sko_addr, skc->skc_obj_size); if (skc->skc_obj_size <= PAGE_SIZE)
kmem_free(sko->sko_addr, skc->skc_obj_size);
else
vmem_free(sko->sko_addr, skc->skc_obj_size);
list_del(&sko->sko_list); list_del(&sko->sko_list);
kmem_cache_free(spl_obj_cache, sko); kmem_cache_free(spl_obj_cache, sko);
i++; i++;
@ -292,13 +305,18 @@ __spl_slab_reclaim(spl_kmem_cache_t *skc)
static int static int
spl_slab_reclaim(spl_kmem_cache_t *skc) spl_slab_reclaim(spl_kmem_cache_t *skc)
{ {
cycles_t start;
int rc; int rc;
ENTRY; ENTRY;
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
start = get_cycles();
rc = __spl_slab_reclaim(skc); rc = __spl_slab_reclaim(skc);
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
if (unlikely((get_cycles() - start) > skc->skc_lock_reclaim))
skc->skc_lock_reclaim = get_cycles() - start;
RETURN(rc); RETURN(rc);
} }
@ -311,17 +329,15 @@ spl_magazine_size(spl_kmem_cache_t *skc)
/* Guesses for reasonable magazine sizes, they /* Guesses for reasonable magazine sizes, they
* should really adapt based on observed usage. */ * should really adapt based on observed usage. */
if (skc->skc_obj_size > (PAGE_SIZE * 256)) if (skc->skc_obj_size > (PAGE_SIZE * 256))
size = 1;
else if (skc->skc_obj_size > (PAGE_SIZE * 32))
size = 4; size = 4;
else if (skc->skc_obj_size > (PAGE_SIZE)) else if (skc->skc_obj_size > (PAGE_SIZE * 32))
size = 16; size = 16;
else if (skc->skc_obj_size > (PAGE_SIZE / 4)) else if (skc->skc_obj_size > (PAGE_SIZE))
size = 32;
else if (skc->skc_obj_size > (PAGE_SIZE / 16))
size = 48;
else
size = 64; size = 64;
else if (skc->skc_obj_size > (PAGE_SIZE / 4))
size = 128;
else
size = 512;
RETURN(size); RETURN(size);
} }
@ -412,7 +428,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
kmem_flags = KM_NOSLEEP; kmem_flags = KM_NOSLEEP;
/* Allocate new cache memory and initialize. */ /* Allocate new cache memory and initialize. */
skc = (spl_kmem_cache_t *)kmem_alloc(sizeof(*skc), kmem_flags); skc = (spl_kmem_cache_t *)kmem_zalloc(sizeof(*skc), kmem_flags);
if (skc == NULL) if (skc == NULL)
RETURN(NULL); RETURN(NULL);
@ -441,7 +457,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_hash_size = SPL_KMEM_CACHE_HASH_SIZE; skc->skc_hash_size = SPL_KMEM_CACHE_HASH_SIZE;
skc->skc_hash_elts = SPL_KMEM_CACHE_HASH_ELTS; skc->skc_hash_elts = SPL_KMEM_CACHE_HASH_ELTS;
skc->skc_hash = (struct hlist_head *) skc->skc_hash = (struct hlist_head *)
kmem_alloc(skc->skc_hash_size, kmem_flags); vmem_alloc(skc->skc_hash_size, kmem_flags);
if (skc->skc_hash == NULL) { if (skc->skc_hash == NULL) {
kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc->skc_name, skc->skc_name_size);
kmem_free(skc, sizeof(*skc)); kmem_free(skc, sizeof(*skc));
@ -466,10 +482,15 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_max = 0; skc->skc_obj_max = 0;
skc->skc_hash_depth = 0; skc->skc_hash_depth = 0;
skc->skc_hash_count = 0; skc->skc_hash_count = 0;
skc->skc_lock_reclaim = 0;
skc->skc_lock_destroy = 0;
skc->skc_lock_grow = 0;
skc->skc_lock_refill = 0;
skc->skc_lock_flush = 0;
rc = spl_magazine_create(skc); rc = spl_magazine_create(skc);
if (rc) { if (rc) {
kmem_free(skc->skc_hash, skc->skc_hash_size); vmem_free(skc->skc_hash, skc->skc_hash_size);
kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc->skc_name, skc->skc_name_size);
kmem_free(skc, sizeof(*skc)); kmem_free(skc, sizeof(*skc));
RETURN(NULL); RETURN(NULL);
@ -490,6 +511,7 @@ void
spl_kmem_cache_destroy(spl_kmem_cache_t *skc) spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
{ {
spl_kmem_slab_t *sks, *m; spl_kmem_slab_t *sks, *m;
cycles_t start;
ENTRY; ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
@ -500,6 +522,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
spl_magazine_destroy(skc); spl_magazine_destroy(skc);
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
start = get_cycles();
/* Validate there are no objects in use and free all the /* Validate there are no objects in use and free all the
* spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */ * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
@ -510,9 +533,13 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list) list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list)
spl_slab_free(sks); spl_slab_free(sks);
kmem_free(skc->skc_hash, skc->skc_hash_size); vmem_free(skc->skc_hash, skc->skc_hash_size);
kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc->skc_name, skc->skc_name_size);
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
if (unlikely((get_cycles() - start) > skc->skc_lock_destroy))
skc->skc_lock_destroy = get_cycles() - start;
kmem_free(skc, sizeof(*skc)); kmem_free(skc, sizeof(*skc));
EXIT; EXIT;
@ -603,6 +630,7 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags)
{ {
spl_kmem_slab_t *sks; spl_kmem_slab_t *sks;
spl_kmem_obj_t *sko; spl_kmem_obj_t *sko;
cycles_t start;
ENTRY; ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
@ -634,11 +662,16 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags)
/* Link the new empty slab in to the end of skc_partial_list */ /* Link the new empty slab in to the end of skc_partial_list */
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
start = get_cycles();
skc->skc_slab_total++; skc->skc_slab_total++;
skc->skc_obj_total += sks->sks_objs; skc->skc_obj_total += sks->sks_objs;
list_add_tail(&sks->sks_list, &skc->skc_partial_list); list_add_tail(&sks->sks_list, &skc->skc_partial_list);
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
if (unlikely((get_cycles() - start) > skc->skc_lock_grow))
skc->skc_lock_grow = get_cycles() - start;
RETURN(sks); RETURN(sks);
} }
@ -647,6 +680,7 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
{ {
spl_kmem_slab_t *sks; spl_kmem_slab_t *sks;
int rc = 0, refill; int rc = 0, refill;
cycles_t start;
ENTRY; ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
@ -656,10 +690,16 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail); refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
start = get_cycles();
while (refill > 0) { while (refill > 0) {
/* No slabs available we must grow the cache */ /* No slabs available we must grow the cache */
if (list_empty(&skc->skc_partial_list)) { if (list_empty(&skc->skc_partial_list)) {
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
if (unlikely((get_cycles() - start) > skc->skc_lock_refill))
skc->skc_lock_refill = get_cycles() - start;
sks = spl_cache_grow(skc, flags); sks = spl_cache_grow(skc, flags);
if (!sks) if (!sks)
GOTO(out, rc); GOTO(out, rc);
@ -674,6 +714,7 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
refill = MIN(refill, skm->skm_size - skm->skm_avail); refill = MIN(refill, skm->skm_size - skm->skm_avail);
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
start = get_cycles();
continue; continue;
} }
@ -700,6 +741,9 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
} }
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
if (unlikely((get_cycles() - start) > skc->skc_lock_refill))
skc->skc_lock_refill = get_cycles() - start;
out: out:
/* Returns the number of entries added to cache */ /* Returns the number of entries added to cache */
RETURN(rc); RETURN(rc);
@ -716,8 +760,8 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
ASSERT(spin_is_locked(&skc->skc_lock)); ASSERT(spin_is_locked(&skc->skc_lock));
sko = spl_hash_obj(skc, obj); sko = spl_hash_obj(skc, obj);
ASSERTF(sko, "Obj %p missing from in-use hash (%d) for cache %s\n", ASSERTF(sko, "Obj %p missing from in-use hash (%d/%d) for cache %s\n",
obj, skc->skc_hash_count, skc->skc_name); obj, skc->skc_hash_depth, skc->skc_hash_count, skc->skc_name);
sks = sko->sko_slab; sks = sko->sko_slab;
ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n", ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n",
@ -755,12 +799,15 @@ static int
spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
{ {
int i, count = MIN(flush, skm->skm_avail); int i, count = MIN(flush, skm->skm_avail);
cycles_t start;
ENTRY; ENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
ASSERT(skm->skm_magic == SKM_MAGIC); ASSERT(skm->skm_magic == SKM_MAGIC);
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
start = get_cycles();
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
spl_cache_shrink(skc, skm->skm_objs[i]); spl_cache_shrink(skc, skm->skm_objs[i]);
@ -771,6 +818,9 @@ spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
if (unlikely((get_cycles() - start) > skc->skc_lock_flush))
skc->skc_lock_flush = get_cycles() - start;
RETURN(count); RETURN(count);
} }
@ -942,11 +992,11 @@ spl_kmem_init(void)
#endif #endif
#ifdef DEBUG_KMEM #ifdef DEBUG_KMEM
{ int i;
atomic64_set(&kmem_alloc_used, 0); atomic64_set(&kmem_alloc_used, 0);
atomic64_set(&vmem_alloc_used, 0); atomic64_set(&vmem_alloc_used, 0);
atomic64_set(&kmem_cache_alloc_failed, 0);
#ifdef DEBUG_KMEM_TRACKING
{ int i;
spin_lock_init(&kmem_lock); spin_lock_init(&kmem_lock);
INIT_LIST_HEAD(&kmem_list); INIT_LIST_HEAD(&kmem_list);
@ -959,6 +1009,7 @@ spl_kmem_init(void)
for (i = 0; i < VMEM_TABLE_SIZE; i++) for (i = 0; i < VMEM_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&vmem_table[i]); INIT_HLIST_HEAD(&vmem_table[i]);
} }
#endif
#endif #endif
RETURN(rc); RETURN(rc);
@ -972,7 +1023,7 @@ out_cache:
RETURN(rc); RETURN(rc);
} }
#ifdef DEBUG_KMEM #if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
static char * static char *
spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
{ {
@ -1013,16 +1064,35 @@ spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
return str; return str;
} }
#endif /* DEBUG_KMEM */
static void
spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
{
unsigned long flags;
kmem_debug_t *kd;
char str[17];
spin_lock_irqsave(lock, flags);
if (!list_empty(list))
CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
"address", "size", "data", "func", "line");
list_for_each_entry(kd, list, kd_list)
CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
kd->kd_addr, kd->kd_size,
spl_sprintf_addr(kd, str, 17, 8),
kd->kd_func, kd->kd_line);
spin_unlock_irqrestore(lock, flags);
}
#else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
#define spl_kmem_fini_tracking(list, lock)
#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
void void
spl_kmem_fini(void) spl_kmem_fini(void)
{ {
#ifdef DEBUG_KMEM #ifdef DEBUG_KMEM
unsigned long flags;
kmem_debug_t *kd;
char str[17];
/* Display all unreclaimed memory addresses, including the /* Display all unreclaimed memory addresses, including the
* allocation size and the first few bytes of what's located * allocation size and the first few bytes of what's located
* at that address to aid in debugging. Performance is not * at that address to aid in debugging. Performance is not
@ -1031,36 +1101,14 @@ spl_kmem_fini(void)
CWARN("kmem leaked %ld/%ld bytes\n", CWARN("kmem leaked %ld/%ld bytes\n",
atomic_read(&kmem_alloc_used), kmem_alloc_max); atomic_read(&kmem_alloc_used), kmem_alloc_max);
spin_lock_irqsave(&kmem_lock, flags);
if (!list_empty(&kmem_list))
CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n",
"address", "size", "data", "func", "line");
list_for_each_entry(kd, &kmem_list, kd_list)
CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
kd->kd_addr, kd->kd_size,
spl_sprintf_addr(kd, str, 17, 8),
kd->kd_func, kd->kd_line);
spin_unlock_irqrestore(&kmem_lock, flags);
if (atomic64_read(&vmem_alloc_used) != 0) if (atomic64_read(&vmem_alloc_used) != 0)
CWARN("vmem leaked %ld/%ld bytes\n", CWARN("vmem leaked %ld/%ld bytes\n",
atomic_read(&vmem_alloc_used), vmem_alloc_max); atomic_read(&vmem_alloc_used), vmem_alloc_max);
spin_lock_irqsave(&vmem_lock, flags); spl_kmem_fini_tracking(&kmem_list, &kmem_lock);
if (!list_empty(&vmem_list)) spl_kmem_fini_tracking(&vmem_list, &vmem_lock);
CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n", #endif /* DEBUG_KMEM */
"address", "size", "data", "func", "line");
list_for_each_entry(kd, &vmem_list, kd_list)
CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
kd->kd_addr, kd->kd_size,
spl_sprintf_addr(kd, str, 17, 8),
kd->kd_func, kd->kd_line);
spin_unlock_irqrestore(&vmem_lock, flags);
#endif
ENTRY; ENTRY;
#ifdef HAVE_SET_SHRINKER #ifdef HAVE_SET_SHRINKER

View File

@ -49,6 +49,7 @@ static struct proc_dir_entry *proc_spl_mutex_stats = NULL;
#endif /* DEBUG_MUTEX */ #endif /* DEBUG_MUTEX */
#ifdef DEBUG_KMEM #ifdef DEBUG_KMEM
static struct proc_dir_entry *proc_spl_kmem = NULL; static struct proc_dir_entry *proc_spl_kmem = NULL;
static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
#endif /* DEBUG_KMEM */ #endif /* DEBUG_KMEM */
#ifdef DEBUG_KSTAT #ifdef DEBUG_KSTAT
struct proc_dir_entry *proc_spl_kstat = NULL; struct proc_dir_entry *proc_spl_kstat = NULL;
@ -131,7 +132,6 @@ enum {
CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */ CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */
CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */ CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */
CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */ CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */
CTL_KMEM_ALLOC_FAILED, /* Cache allocation failed */
#endif #endif
CTL_MUTEX_STATS, /* Global mutex statistics */ CTL_MUTEX_STATS, /* Global mutex statistics */
@ -561,6 +561,112 @@ static struct file_operations proc_mutex_operations = {
}; };
#endif /* DEBUG_MUTEX */ #endif /* DEBUG_MUTEX */
#ifdef DEBUG_KMEM
static void
slab_seq_show_headers(struct seq_file *f)
{
seq_printf(f, "%-36s\n", "name");
}
static int
slab_seq_show(struct seq_file *f, void *p)
{
spl_kmem_cache_t *skc = p;
ASSERT(skc->skc_magic == SKC_MAGIC);
spin_lock(&skc->skc_lock);
seq_printf(f, "%-36s ", skc->skc_name);
seq_printf(f, "%u %u %u - %u %u %u - "
"%lu %lu %lu - %lu %lu %lu - %lu %lu %lu - %lu %lu - "
"%llu %llu %llu %llu %llu\n",
(unsigned)skc->skc_obj_size,
(unsigned)skc->skc_chunk_size,
(unsigned)skc->skc_slab_size,
(unsigned)skc->skc_hash_bits,
(unsigned)skc->skc_hash_size,
(unsigned)skc->skc_hash_elts,
(long unsigned)skc->skc_slab_fail,
(long unsigned)skc->skc_slab_create,
(long unsigned)skc->skc_slab_destroy,
(long unsigned)skc->skc_slab_total,
(long unsigned)skc->skc_slab_alloc,
(long unsigned)skc->skc_slab_max,
(long unsigned)skc->skc_obj_total,
(long unsigned)skc->skc_obj_alloc,
(long unsigned)skc->skc_obj_max,
(long unsigned)skc->skc_hash_depth,
(long unsigned)skc->skc_hash_count,
(long long unsigned)skc->skc_lock_reclaim,
(long long unsigned)skc->skc_lock_destroy,
(long long unsigned)skc->skc_lock_grow,
(long long unsigned)skc->skc_lock_refill,
(long long unsigned)skc->skc_lock_flush);
spin_unlock(&skc->skc_lock);
return 0;
}
static void *
slab_seq_start(struct seq_file *f, loff_t *pos)
{
struct list_head *p;
loff_t n = *pos;
ENTRY;
down_read(&spl_kmem_cache_sem);
if (!n)
slab_seq_show_headers(f);
p = spl_kmem_cache_list.next;
while (n--) {
p = p->next;
if (p == &spl_kmem_cache_list)
RETURN(NULL);
}
RETURN(list_entry(p, spl_kmem_cache_t, skc_list));
}
static void *
slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
{
spl_kmem_cache_t *skc = p;
ENTRY;
++*pos;
RETURN((skc->skc_list.next == &spl_kmem_cache_list) ?
NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
}
static void
slab_seq_stop(struct seq_file *f, void *v)
{
up_read(&spl_kmem_cache_sem);
}
static struct seq_operations slab_seq_ops = {
.show = slab_seq_show,
.start = slab_seq_start,
.next = slab_seq_next,
.stop = slab_seq_stop,
};
static int
proc_slab_open(struct inode *inode, struct file *filp)
{
return seq_open(filp, &slab_seq_ops);
}
static struct file_operations proc_slab_operations = {
.open = proc_slab_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif /* DEBUG_KMEM */
static struct ctl_table spl_debug_table[] = { static struct ctl_table spl_debug_table[] = {
{ {
.ctl_name = CTL_DEBUG_SUBSYS, .ctl_name = CTL_DEBUG_SUBSYS,
@ -735,14 +841,6 @@ static struct ctl_table spl_kmem_table[] = {
.mode = 0444, .mode = 0444,
.proc_handler = &proc_doulongvec_minmax, .proc_handler = &proc_doulongvec_minmax,
}, },
{
.ctl_name = CTL_KMEM_ALLOC_FAILED,
.procname = "kmem_alloc_failed",
.data = &kmem_cache_alloc_failed,
.maxlen = sizeof(atomic64_t),
.mode = 0444,
.proc_handler = &proc_doatomic64,
},
{0}, {0},
}; };
#endif /* DEBUG_KMEM */ #endif /* DEBUG_KMEM */
@ -901,6 +999,12 @@ proc_init(void)
proc_spl_kmem = proc_mkdir("kmem", proc_spl); proc_spl_kmem = proc_mkdir("kmem", proc_spl);
if (proc_spl_kmem == NULL) if (proc_spl_kmem == NULL)
GOTO(out, rc = -EUNATCH); GOTO(out, rc = -EUNATCH);
proc_spl_kmem_slab = create_proc_entry("slab", 0444, proc_spl_kmem);
if (proc_spl_kmem_slab == NULL)
GOTO(out, rc = -EUNATCH);
proc_spl_kmem_slab->proc_fops = &proc_slab_operations;
#endif /* DEBUG_KMEM */ #endif /* DEBUG_KMEM */
#ifdef DEBUG_KSTAT #ifdef DEBUG_KSTAT
@ -912,6 +1016,9 @@ proc_init(void)
out: out:
if (rc) { if (rc) {
remove_proc_entry("kstat", proc_spl); remove_proc_entry("kstat", proc_spl);
#ifdef DEBUG_KMEM
remove_proc_entry("slab", proc_spl_kmem);
#endif
remove_proc_entry("kmem", proc_spl); remove_proc_entry("kmem", proc_spl);
#ifdef DEBUG_MUTEX #ifdef DEBUG_MUTEX
remove_proc_entry("stats_per", proc_spl_mutex); remove_proc_entry("stats_per", proc_spl_mutex);
@ -934,6 +1041,9 @@ proc_fini(void)
#if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) #if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
remove_proc_entry("kstat", proc_spl); remove_proc_entry("kstat", proc_spl);
#ifdef DEBUG_KMEM
remove_proc_entry("slab", proc_spl_kmem);
#endif
remove_proc_entry("kmem", proc_spl); remove_proc_entry("kmem", proc_spl);
#ifdef DEBUG_MUTEX #ifdef DEBUG_MUTEX
remove_proc_entry("stats_per", proc_spl_mutex); remove_proc_entry("stats_per", proc_spl_mutex);

View File

@ -66,3 +66,22 @@ __gethrtime(void) {
return rc; return rc;
} }
EXPORT_SYMBOL(__gethrtime); EXPORT_SYMBOL(__gethrtime);
/* Not exported from the kernel, but we need it for timespec_sub. Be very
* careful here we are using the kernel prototype, so that must not change.
*/
void
set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
nsec += NSEC_PER_SEC;
--sec;
}
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}
EXPORT_SYMBOL(set_normalized_timespec);

View File

@ -65,23 +65,6 @@
#define SPLAT_KMEM_ALLOC_COUNT 10 #define SPLAT_KMEM_ALLOC_COUNT 10
#define SPLAT_VMEM_ALLOC_COUNT 10 #define SPLAT_VMEM_ALLOC_COUNT 10
/* Not exported from the kernel, but we need it for timespec_sub. Be very
* * careful here we are using the kernel prototype, so that must not change.
* */
void
set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
nsec += NSEC_PER_SEC;
--sec;
}
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}
/* XXX - This test may fail under tight memory conditions */ /* XXX - This test may fail under tight memory conditions */
static int static int