mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Track emergency object in rbtree
In the initial implementation emergency objects were tracked on a per-cache list. The assumption was that under normal operation we would never allocate more than a handful of these objects. So the cost of walking the list during free was expected to be negligible. However real world usage has shown that emergency objects tend to be allocated in batches. A deadlock will be detected and several thousand emergency objects will be allocated before the original blocked slab allocation can complete. Therefore the original list has been replaced by a red black tree which is sorted by the memory address of each allocated object. This bounds the worst case insertion and removal time to O(log n) which minimize contention on the assoicated spin lock. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
+72
-26
@@ -1116,8 +1116,54 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
|
||||
SEXIT;
|
||||
}
|
||||
|
||||
static spl_kmem_emergency_t *
|
||||
spl_emergency_search(struct rb_root *root, void *obj)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
spl_kmem_emergency_t *ske;
|
||||
unsigned long address = (unsigned long)obj;
|
||||
|
||||
while (node) {
|
||||
ske = container_of(node, spl_kmem_emergency_t, ske_node);
|
||||
|
||||
if (address < (unsigned long)ske->ske_obj)
|
||||
node = node->rb_left;
|
||||
else if (address > (unsigned long)ske->ske_obj)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return ske;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
|
||||
{
|
||||
struct rb_node **new = &(root->rb_node), *parent = NULL;
|
||||
spl_kmem_emergency_t *ske_tmp;
|
||||
unsigned long address = (unsigned long)ske->ske_obj;
|
||||
|
||||
while (*new) {
|
||||
ske_tmp = container_of(*new, spl_kmem_emergency_t, ske_node);
|
||||
|
||||
parent = *new;
|
||||
if (address < (unsigned long)ske_tmp->ske_obj)
|
||||
new = &((*new)->rb_left);
|
||||
else if (address > (unsigned long)ske_tmp->ske_obj)
|
||||
new = &((*new)->rb_right);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
rb_link_node(&ske->ske_node, parent, new);
|
||||
rb_insert_color(&ske->ske_node, root);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a single emergency object for use by the caller.
|
||||
* Allocate a single emergency object and track it in a red black tree.
|
||||
*/
|
||||
static int
|
||||
spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
|
||||
@@ -1143,48 +1189,49 @@ spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
|
||||
SRETURN(-ENOMEM);
|
||||
}
|
||||
|
||||
spin_lock(&skc->skc_lock);
|
||||
empty = spl_emergency_insert(&skc->skc_emergency_tree, ske);
|
||||
if (likely(empty)) {
|
||||
skc->skc_obj_total++;
|
||||
skc->skc_obj_emergency++;
|
||||
if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
|
||||
skc->skc_obj_emergency_max = skc->skc_obj_emergency;
|
||||
}
|
||||
spin_unlock(&skc->skc_lock);
|
||||
|
||||
if (unlikely(!empty)) {
|
||||
kfree(ske->ske_obj);
|
||||
kfree(ske);
|
||||
SRETURN(-EINVAL);
|
||||
}
|
||||
|
||||
if (skc->skc_ctor)
|
||||
skc->skc_ctor(ske->ske_obj, skc->skc_private, flags);
|
||||
|
||||
spin_lock(&skc->skc_lock);
|
||||
skc->skc_obj_total++;
|
||||
skc->skc_obj_emergency++;
|
||||
if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
|
||||
skc->skc_obj_emergency_max = skc->skc_obj_emergency;
|
||||
|
||||
list_add(&ske->ske_list, &skc->skc_emergency_list);
|
||||
spin_unlock(&skc->skc_lock);
|
||||
|
||||
*obj = ske->ske_obj;
|
||||
|
||||
SRETURN(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the passed object if it is an emergency object or a normal slab
|
||||
* object. Currently this is done by walking what should be a short list of
|
||||
* emergency objects. If this proves to be too inefficient we can replace
|
||||
* the simple list with a hash.
|
||||
* Locate the passed object in the red black tree and free it.
|
||||
*/
|
||||
static int
|
||||
spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
|
||||
{
|
||||
spl_kmem_emergency_t *m, *n, *ske = NULL;
|
||||
spl_kmem_emergency_t *ske;
|
||||
SENTRY;
|
||||
|
||||
spin_lock(&skc->skc_lock);
|
||||
list_for_each_entry_safe(m, n, &skc->skc_emergency_list, ske_list) {
|
||||
if (m->ske_obj == obj) {
|
||||
list_del(&m->ske_list);
|
||||
skc->skc_obj_emergency--;
|
||||
skc->skc_obj_total--;
|
||||
ske = m;
|
||||
break;
|
||||
}
|
||||
ske = spl_emergency_search(&skc->skc_emergency_tree, obj);
|
||||
if (likely(ske)) {
|
||||
rb_erase(&ske->ske_node, &skc->skc_emergency_tree);
|
||||
skc->skc_obj_emergency--;
|
||||
skc->skc_obj_total--;
|
||||
}
|
||||
spin_unlock(&skc->skc_lock);
|
||||
|
||||
if (ske == NULL)
|
||||
if (unlikely(ske == NULL))
|
||||
SRETURN(-ENOENT);
|
||||
|
||||
if (skc->skc_dtor)
|
||||
@@ -1483,7 +1530,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
|
||||
INIT_LIST_HEAD(&skc->skc_list);
|
||||
INIT_LIST_HEAD(&skc->skc_complete_list);
|
||||
INIT_LIST_HEAD(&skc->skc_partial_list);
|
||||
INIT_LIST_HEAD(&skc->skc_emergency_list);
|
||||
skc->skc_emergency_tree = RB_ROOT;
|
||||
spin_lock_init(&skc->skc_lock);
|
||||
init_waitqueue_head(&skc->skc_waitq);
|
||||
skc->skc_slab_fail = 0;
|
||||
@@ -1590,7 +1637,6 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
|
||||
ASSERT3U(skc->skc_obj_total, ==, 0);
|
||||
ASSERT3U(skc->skc_obj_emergency, ==, 0);
|
||||
ASSERT(list_empty(&skc->skc_complete_list));
|
||||
ASSERT(list_empty(&skc->skc_emergency_list));
|
||||
|
||||
kmem_free(skc->skc_name, skc->skc_name_size);
|
||||
spin_unlock(&skc->skc_lock);
|
||||
|
||||
Reference in New Issue
Block a user