mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-14 12:10:26 +03:00
kmem slab fixes
- Default SPL_KMEM_CACHE_DELAY changed to 15 to match Solaris. - Aged out slab checking occurs every SPL_KMEM_CACHE_DELAY / 3. - skc->skc_reap tunable added whichs allows callers of spl_slab_reclaim() to cap the number of slabs reclaimed. On Solaris all eligible slabs are always reclaimed, and this is still the default behavior. However, I suspect that is not always wise for reasons such as in the next comment. - spl_slab_reclaim() added cond_resched() while walking the slab/object free lists. Soft lockups were observed when freeing large numbers of vmalloc'd slabs/objets. - spl_slab_reclaim() 'sks->sks_ref > 0' check changes from incorrect 'break' to 'continue' to ensure all slabs are checked. - spl_cache_age() reworked to avoid a deadlock with do_flush_tlb_all() which occured because we slept waiting for completion in spl_cache_age(). To waiting for magazine reclamation to finish is not required so we no longer wait. - spl_magazine_create() and spl_magazine_destroy() shifted back to using for_each_online_cpu() instead of the spl_on_each_cpu() approach which was of course a bad idea due to memory allocations which Ricardo pointed out.
This commit is contained in:
parent
f500ccff35
commit
37db7d8cf9
@ -239,7 +239,8 @@ extern struct rw_semaphore spl_kmem_cache_sem;
|
|||||||
#define SKS_MAGIC 0x22222222
|
#define SKS_MAGIC 0x22222222
|
||||||
#define SKC_MAGIC 0x2c2c2c2c
|
#define SKC_MAGIC 0x2c2c2c2c
|
||||||
|
|
||||||
#define SPL_KMEM_CACHE_DELAY 5 /* Minimum slab release age */
|
#define SPL_KMEM_CACHE_DELAY 15 /* Minimum slab release age */
|
||||||
|
#define SPL_KMEM_CACHE_REAP 0 /* Default reap everything */
|
||||||
#define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 /* Target objects per slab */
|
#define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 /* Target objects per slab */
|
||||||
#define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN 8 /* Minimum objects per slab */
|
#define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN 8 /* Minimum objects per slab */
|
||||||
#define SPL_KMEM_CACHE_ALIGN 8 /* Default object alignment */
|
#define SPL_KMEM_CACHE_ALIGN 8 /* Default object alignment */
|
||||||
@ -292,6 +293,7 @@ typedef struct spl_kmem_cache {
|
|||||||
uint32_t skc_slab_objs; /* Objects per slab */
|
uint32_t skc_slab_objs; /* Objects per slab */
|
||||||
uint32_t skc_slab_size; /* Slab size */
|
uint32_t skc_slab_size; /* Slab size */
|
||||||
uint32_t skc_delay; /* Slab reclaim interval */
|
uint32_t skc_delay; /* Slab reclaim interval */
|
||||||
|
uint32_t skc_reap; /* Slab reclaim count */
|
||||||
atomic_t skc_ref; /* Ref count callers */
|
atomic_t skc_ref; /* Ref count callers */
|
||||||
struct delayed_work skc_work; /* Slab reclaim work */
|
struct delayed_work skc_work; /* Slab reclaim work */
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
|
@ -856,16 +856,19 @@ spl_slab_free(spl_kmem_slab_t *sks,
|
|||||||
/*
|
/*
|
||||||
* Traverses all the partial slabs attached to a cache and free those
|
* Traverses all the partial slabs attached to a cache and free those
|
||||||
* which which are currently empty, and have not been touched for
|
* which which are currently empty, and have not been touched for
|
||||||
* skc_delay seconds. This is to avoid thrashing.
|
* skc_delay seconds to avoid thrashing. The count argument is
|
||||||
|
* passed to optionally cap the number of slabs reclaimed, a count
|
||||||
|
* of zero means try and reclaim everything. When flag is set we
|
||||||
|
* always free an available slab regardless of age.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
spl_slab_reclaim(spl_kmem_cache_t *skc, int flag)
|
spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
|
||||||
{
|
{
|
||||||
spl_kmem_slab_t *sks, *m;
|
spl_kmem_slab_t *sks, *m;
|
||||||
spl_kmem_obj_t *sko, *n;
|
spl_kmem_obj_t *sko, *n;
|
||||||
LIST_HEAD(sks_list);
|
LIST_HEAD(sks_list);
|
||||||
LIST_HEAD(sko_list);
|
LIST_HEAD(sko_list);
|
||||||
int size;
|
int size, i = 0;
|
||||||
ENTRY;
|
ENTRY;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -878,11 +881,18 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int flag)
|
|||||||
spin_lock(&skc->skc_lock);
|
spin_lock(&skc->skc_lock);
|
||||||
list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list,
|
list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list,
|
||||||
sks_list) {
|
sks_list) {
|
||||||
if (sks->sks_ref > 0)
|
/* Release at most count slabs */
|
||||||
break;
|
if (count && i > count)
|
||||||
|
break;
|
||||||
|
|
||||||
if (flag || time_after(jiffies,sks->sks_age+skc->skc_delay*HZ))
|
/* Skip active slabs */
|
||||||
|
if (sks->sks_ref > 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (time_after(jiffies,sks->sks_age+skc->skc_delay*HZ)||flag) {
|
||||||
spl_slab_free(sks, &sks_list, &sko_list);
|
spl_slab_free(sks, &sks_list, &sko_list);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
spin_unlock(&skc->skc_lock);
|
spin_unlock(&skc->skc_lock);
|
||||||
|
|
||||||
@ -896,12 +906,18 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int flag)
|
|||||||
size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) +
|
size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) +
|
||||||
P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align);
|
P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align);
|
||||||
|
|
||||||
list_for_each_entry_safe(sko, n, &sko_list, sko_list)
|
/* To avoid soft lockups conditionally reschedule */
|
||||||
|
list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
|
||||||
kv_free(skc, sko->sko_addr, size);
|
kv_free(skc, sko->sko_addr, size);
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry_safe(sks, m, &sks_list, sks_list)
|
/* To avoid soft lockups conditionally reschedule */
|
||||||
|
list_for_each_entry_safe(sks, m, &sks_list, sks_list) {
|
||||||
kv_free(skc, sks, skc->skc_slab_size);
|
kv_free(skc, sks, skc->skc_slab_size);
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
|
||||||
EXIT;
|
EXIT;
|
||||||
}
|
}
|
||||||
@ -937,11 +953,11 @@ spl_cache_age(void *data)
|
|||||||
spl_get_work_data(data, spl_kmem_cache_t, skc_work.work);
|
spl_get_work_data(data, spl_kmem_cache_t, skc_work.work);
|
||||||
|
|
||||||
ASSERT(skc->skc_magic == SKC_MAGIC);
|
ASSERT(skc->skc_magic == SKC_MAGIC);
|
||||||
spl_on_each_cpu(spl_magazine_age, skc, 1);
|
spl_slab_reclaim(skc, skc->skc_reap, 0);
|
||||||
spl_slab_reclaim(skc, 0);
|
spl_on_each_cpu(spl_magazine_age, skc, 0);
|
||||||
|
|
||||||
if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
|
if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
|
||||||
schedule_delayed_work(&skc->skc_work, 2 * skc->skc_delay * HZ);
|
schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1057,49 +1073,47 @@ spl_magazine_free(spl_kmem_magazine_t *skm)
|
|||||||
EXIT;
|
EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
__spl_magazine_create(void *data)
|
|
||||||
{
|
|
||||||
spl_kmem_cache_t *skc = data;
|
|
||||||
int id = smp_processor_id();
|
|
||||||
|
|
||||||
skc->skc_mag[id] = spl_magazine_alloc(skc, cpu_to_node(id));
|
|
||||||
ASSERT(skc->skc_mag[id]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create all pre-cpu magazines of reasonable sizes.
|
* Create all pre-cpu magazines of reasonable sizes.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
spl_magazine_create(spl_kmem_cache_t *skc)
|
spl_magazine_create(spl_kmem_cache_t *skc)
|
||||||
{
|
{
|
||||||
|
int i;
|
||||||
ENTRY;
|
ENTRY;
|
||||||
|
|
||||||
skc->skc_mag_size = spl_magazine_size(skc);
|
skc->skc_mag_size = spl_magazine_size(skc);
|
||||||
skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
|
skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
|
||||||
spl_on_each_cpu(__spl_magazine_create, skc, 1);
|
|
||||||
|
for_each_online_cpu(i) {
|
||||||
|
skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i));
|
||||||
|
if (!skc->skc_mag[i]) {
|
||||||
|
for (i--; i >= 0; i--)
|
||||||
|
spl_magazine_free(skc->skc_mag[i]);
|
||||||
|
|
||||||
|
RETURN(-ENOMEM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RETURN(0);
|
RETURN(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
__spl_magazine_destroy(void *data)
|
|
||||||
{
|
|
||||||
spl_kmem_cache_t *skc = data;
|
|
||||||
spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
|
|
||||||
|
|
||||||
(void)spl_cache_flush(skc, skm, skm->skm_avail);
|
|
||||||
spl_magazine_free(skm);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Destroy all pre-cpu magazines.
|
* Destroy all pre-cpu magazines.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
spl_magazine_destroy(spl_kmem_cache_t *skc)
|
spl_magazine_destroy(spl_kmem_cache_t *skc)
|
||||||
{
|
{
|
||||||
|
spl_kmem_magazine_t *skm;
|
||||||
|
int i;
|
||||||
ENTRY;
|
ENTRY;
|
||||||
spl_on_each_cpu(__spl_magazine_destroy, skc, 1);
|
|
||||||
|
for_each_online_cpu(i) {
|
||||||
|
skm = skc->skc_mag[i];
|
||||||
|
(void)spl_cache_flush(skc, skm, skm->skm_avail);
|
||||||
|
spl_magazine_free(skm);
|
||||||
|
}
|
||||||
|
|
||||||
EXIT;
|
EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1168,6 +1182,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
|
|||||||
skc->skc_obj_size = size;
|
skc->skc_obj_size = size;
|
||||||
skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
|
skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
|
||||||
skc->skc_delay = SPL_KMEM_CACHE_DELAY;
|
skc->skc_delay = SPL_KMEM_CACHE_DELAY;
|
||||||
|
skc->skc_reap = SPL_KMEM_CACHE_REAP;
|
||||||
atomic_set(&skc->skc_ref, 0);
|
atomic_set(&skc->skc_ref, 0);
|
||||||
|
|
||||||
INIT_LIST_HEAD(&skc->skc_list);
|
INIT_LIST_HEAD(&skc->skc_list);
|
||||||
@ -1209,7 +1224,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
|
|||||||
GOTO(out, rc);
|
GOTO(out, rc);
|
||||||
|
|
||||||
spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc);
|
spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc);
|
||||||
schedule_delayed_work(&skc->skc_work, 2 * skc->skc_delay * HZ);
|
schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
|
||||||
|
|
||||||
down_write(&spl_kmem_cache_sem);
|
down_write(&spl_kmem_cache_sem);
|
||||||
list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
|
list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
|
||||||
@ -1249,7 +1264,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
|
|||||||
wait_event(wq, atomic_read(&skc->skc_ref) == 0);
|
wait_event(wq, atomic_read(&skc->skc_ref) == 0);
|
||||||
|
|
||||||
spl_magazine_destroy(skc);
|
spl_magazine_destroy(skc);
|
||||||
spl_slab_reclaim(skc, 1);
|
spl_slab_reclaim(skc, 0, 1);
|
||||||
spin_lock(&skc->skc_lock);
|
spin_lock(&skc->skc_lock);
|
||||||
|
|
||||||
/* Validate there are no objects in use and free all the
|
/* Validate there are no objects in use and free all the
|
||||||
@ -1654,7 +1669,7 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
|
|||||||
if (skc->skc_reclaim)
|
if (skc->skc_reclaim)
|
||||||
skc->skc_reclaim(skc->skc_private);
|
skc->skc_reclaim(skc->skc_private);
|
||||||
|
|
||||||
spl_slab_reclaim(skc, 0);
|
spl_slab_reclaim(skc, skc->skc_reap, 0);
|
||||||
clear_bit(KMC_BIT_REAPING, &skc->skc_flags);
|
clear_bit(KMC_BIT_REAPING, &skc->skc_flags);
|
||||||
atomic_dec(&skc->skc_ref);
|
atomic_dec(&skc->skc_ref);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user