Improved vmem cached deadlock detection

The entire goal of performing the slab allocations asynchronously
is to be able to detect when a vmalloc() deadlocks.  In this case,
and only this case, do we want to start allocating emergency objects.
The trick here is to minimize false positives because the overhead
of tracking emergency objects is far higher than normal slab objects.

With that goal in mind the code was reworked to be less sensitive
to slow allocations by increasing the wait time.  Once a cache is
is marked deadlocked all subsequent allocations which can not be
satisfied with existing cache objects will immediately allocate new
emergency objects.  This behavior persists until the asynchronous
allocation completes and clears the deadlocked flag.

The result of these tweaks is that far fewer emergency objects
get created which is important because this minimizes the cost of
releasing them latter in kmem_cache_free().

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Brian Behlendorf 2012-10-29 16:51:59 -07:00
parent 65c2fc5a2e
commit 165f13c33a
3 changed files with 34 additions and 13 deletions

View File

@ -340,6 +340,7 @@ enum {
KMC_BIT_VMEM = 6, /* Use vmem cache */ KMC_BIT_VMEM = 6, /* Use vmem cache */
KMC_BIT_OFFSLAB = 7, /* Objects not on slab */ KMC_BIT_OFFSLAB = 7, /* Objects not on slab */
KMC_BIT_NOEMERGENCY = 8, /* Disable emergency objects */ KMC_BIT_NOEMERGENCY = 8, /* Disable emergency objects */
KMC_BIT_DEADLOCKED = 14, /* Deadlock detected */
KMC_BIT_GROWING = 15, /* Growing in progress */ KMC_BIT_GROWING = 15, /* Growing in progress */
KMC_BIT_REAPING = 16, /* Reaping in progress */ KMC_BIT_REAPING = 16, /* Reaping in progress */
KMC_BIT_DESTROY = 17, /* Destroy in progress */ KMC_BIT_DESTROY = 17, /* Destroy in progress */
@ -366,6 +367,7 @@ typedef enum kmem_cbrc {
#define KMC_VMEM (1 << KMC_BIT_VMEM) #define KMC_VMEM (1 << KMC_BIT_VMEM)
#define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB) #define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB)
#define KMC_NOEMERGENCY (1 << KMC_BIT_NOEMERGENCY) #define KMC_NOEMERGENCY (1 << KMC_BIT_NOEMERGENCY)
#define KMC_DEADLOCKED (1 << KMC_BIT_DEADLOCKED)
#define KMC_GROWING (1 << KMC_BIT_GROWING) #define KMC_GROWING (1 << KMC_BIT_GROWING)
#define KMC_REAPING (1 << KMC_BIT_REAPING) #define KMC_REAPING (1 << KMC_BIT_REAPING)
#define KMC_DESTROY (1 << KMC_BIT_DESTROY) #define KMC_DESTROY (1 << KMC_BIT_DESTROY)
@ -473,6 +475,7 @@ typedef struct spl_kmem_cache {
uint64_t skc_obj_total; /* Obj total current */ uint64_t skc_obj_total; /* Obj total current */
uint64_t skc_obj_alloc; /* Obj alloc current */ uint64_t skc_obj_alloc; /* Obj alloc current */
uint64_t skc_obj_max; /* Obj max historic */ uint64_t skc_obj_max; /* Obj max historic */
uint64_t skc_obj_deadlock; /* Obj emergency deadlocks */
uint64_t skc_obj_emergency; /* Obj emergency current */ uint64_t skc_obj_emergency; /* Obj emergency current */
uint64_t skc_obj_emergency_max; /* Obj emergency max */ uint64_t skc_obj_emergency_max; /* Obj emergency max */
} spl_kmem_cache_t; } spl_kmem_cache_t;

View File

@ -1495,6 +1495,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
skc->skc_obj_total = 0; skc->skc_obj_total = 0;
skc->skc_obj_alloc = 0; skc->skc_obj_alloc = 0;
skc->skc_obj_max = 0; skc->skc_obj_max = 0;
skc->skc_obj_deadlock = 0;
skc->skc_obj_emergency = 0; skc->skc_obj_emergency = 0;
skc->skc_obj_emergency_max = 0; skc->skc_obj_emergency_max = 0;
@ -1662,6 +1663,7 @@ spl_cache_grow_work(void *data)
atomic_dec(&skc->skc_ref); atomic_dec(&skc->skc_ref);
clear_bit(KMC_BIT_GROWING, &skc->skc_flags); clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
wake_up_all(&skc->skc_waitq); wake_up_all(&skc->skc_waitq);
spin_unlock(&skc->skc_lock); spin_unlock(&skc->skc_lock);
@ -1683,7 +1685,7 @@ spl_cache_grow_wait(spl_kmem_cache_t *skc)
static int static int
spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj) spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
{ {
int remaining, rc = 0; int remaining, rc;
SENTRY; SENTRY;
ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skc->skc_magic == SKC_MAGIC);
@ -1722,17 +1724,30 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
} }
/* /*
* Allow a single timer tick before falling back to synchronously * The goal here is to only detect the rare case where a virtual slab
* allocating the minimum about of memory required by the caller. * allocation has deadlocked. We must be careful to minimize the use
* of emergency objects which are more expensive to track. Therefore,
* we set a very long timeout for the asynchronous allocation and if
* the timeout is reached the cache is flagged as deadlocked. From
* this point only new emergency objects will be allocated until the
* asynchronous allocation completes and clears the deadlocked flag.
*/ */
remaining = wait_event_timeout(skc->skc_waitq, if (test_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags)) {
spl_cache_grow_wait(skc), 1); rc = spl_emergency_alloc(skc, flags, obj);
} else {
remaining = wait_event_timeout(skc->skc_waitq,
spl_cache_grow_wait(skc), HZ);
if (remaining == 0) { if (!remaining && test_bit(KMC_BIT_VMEM, &skc->skc_flags)) {
if (test_bit(KMC_BIT_NOEMERGENCY, &skc->skc_flags)) spin_lock(&skc->skc_lock);
rc = -ENOMEM; if (test_bit(KMC_BIT_GROWING, &skc->skc_flags)) {
else set_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
rc = spl_emergency_alloc(skc, flags, obj); skc->skc_obj_deadlock++;
}
spin_unlock(&skc->skc_lock);
}
rc = -ENOMEM;
} }
SRETURN(rc); SRETURN(rc);

View File

@ -625,12 +625,14 @@ slab_seq_show_headers(struct seq_file *f)
"--------------------- cache ----------" "--------------------- cache ----------"
"--------------------------------------------- " "--------------------------------------------- "
"----- slab ------ " "----- slab ------ "
"---- object -----------------\n"); "---- object ----- "
"--- emergency ---\n");
seq_printf(f, seq_printf(f,
"name " "name "
" flags size alloc slabsize objsize " " flags size alloc slabsize objsize "
"total alloc max " "total alloc max "
"total alloc max emerg max\n"); "total alloc max "
"dlock alloc max\n");
} }
static int static int
@ -643,7 +645,7 @@ slab_seq_show(struct seq_file *f, void *p)
spin_lock(&skc->skc_lock); spin_lock(&skc->skc_lock);
seq_printf(f, "%-36s ", skc->skc_name); seq_printf(f, "%-36s ", skc->skc_name);
seq_printf(f, "0x%05lx %9lu %9lu %8u %8u " seq_printf(f, "0x%05lx %9lu %9lu %8u %8u "
"%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n", "%5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu %5lu\n",
(long unsigned)skc->skc_flags, (long unsigned)skc->skc_flags,
(long unsigned)(skc->skc_slab_size * skc->skc_slab_total), (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
(long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc), (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
@ -655,6 +657,7 @@ slab_seq_show(struct seq_file *f, void *p)
(long unsigned)skc->skc_obj_total, (long unsigned)skc->skc_obj_total,
(long unsigned)skc->skc_obj_alloc, (long unsigned)skc->skc_obj_alloc,
(long unsigned)skc->skc_obj_max, (long unsigned)skc->skc_obj_max,
(long unsigned)skc->skc_obj_deadlock,
(long unsigned)skc->skc_obj_emergency, (long unsigned)skc->skc_obj_emergency,
(long unsigned)skc->skc_obj_emergency_max); (long unsigned)skc->skc_obj_emergency_max);