diff --git a/include/sys/kmem.h b/include/sys/kmem.h index 83adc8d2a..e189922ef 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -37,6 +37,7 @@ #include #include #include +#include /* * Memory allocation interfaces @@ -406,7 +407,6 @@ typedef struct spl_kmem_magazine { uint32_t skm_size; /* Magazine size */ uint32_t skm_refill; /* Batch refill size */ struct spl_kmem_cache *skm_cache; /* Owned by cache */ - struct delayed_work skm_work; /* Magazine reclaim work */ unsigned long skm_age; /* Last cache access */ unsigned int skm_cpu; /* Owned by cpu */ void *skm_objs[0]; /* Object pointers */ @@ -460,7 +460,7 @@ typedef struct spl_kmem_cache { uint32_t skc_delay; /* Slab reclaim interval */ uint32_t skc_reap; /* Slab reclaim count */ atomic_t skc_ref; /* Ref count callers */ - struct delayed_work skc_work; /* Slab reclaim work */ + taskqid_t skc_taskqid; /* Slab reclaim task */ struct list_head skc_list; /* List of caches linkage */ struct list_head skc_complete_list;/* Completely alloc'ed */ struct list_head skc_partial_list; /* Partially alloc'ed */ diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index f78f820aa..3900c9cf0 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -825,6 +825,7 @@ EXPORT_SYMBOL(vmem_free_debug); struct list_head spl_kmem_cache_list; /* List of caches */ struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ +taskq_t *spl_kmem_cache_taskq; /* Task queue for ageing / reclaim */ static int spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush); @@ -1243,50 +1244,59 @@ spl_emergency_free(spl_kmem_cache_t *skc, void *obj) SRETURN(0); } -/* - * Called regularly on all caches to age objects out of the magazines - * which have not been access in skc->skc_delay seconds. This prevents - * idle magazines from holding memory which might be better used by - * other caches or parts of the system. The delay is present to - * prevent thrashing the magazine. - */ static void spl_magazine_age(void *data) { - spl_kmem_magazine_t *skm = - spl_get_work_data(data, spl_kmem_magazine_t, skm_work.work); - spl_kmem_cache_t *skc = skm->skm_cache; + spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data; + spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()]; ASSERT(skm->skm_magic == SKM_MAGIC); - ASSERT(skc->skc_magic == SKC_MAGIC); - ASSERT(skc->skc_mag[skm->skm_cpu] == skm); + ASSERT(skm->skm_cpu == smp_processor_id()); - if (skm->skm_avail > 0 && - time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) - (void)spl_cache_flush(skc, skm, skm->skm_refill); - - if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)) - schedule_delayed_work_on(skm->skm_cpu, &skm->skm_work, - skc->skc_delay / 3 * HZ); + if (skm->skm_avail > 0) + if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) + (void) spl_cache_flush(skc, skm, skm->skm_refill); } /* - * Called regularly to keep a downward pressure on the size of idle - * magazines and to release free slabs from the cache. This function - * never calls the registered reclaim function, that only occurs - * under memory pressure or with a direct call to spl_kmem_reap(). + * Called regularly to keep a downward pressure on the cache. + * + * Objects older than skc->skc_delay seconds in the per-cpu magazines will + * be returned to the caches. This is done to prevent idle magazines from + * holding memory which could be better used elsewhere. The delay is + * present to prevent thrashing the magazine. + * + * The newly released objects may result in empty partial slabs. Those + * slabs should be released to the system. Otherwise moving the objects + * out of the magazines is just wasted work. */ static void spl_cache_age(void *data) { - spl_kmem_cache_t *skc = - spl_get_work_data(data, spl_kmem_cache_t, skc_work.work); + spl_kmem_cache_t *skc = (spl_kmem_cache_t *)data; + taskqid_t id = 0; ASSERT(skc->skc_magic == SKC_MAGIC); + + atomic_inc(&skc->skc_ref); + spl_on_each_cpu(spl_magazine_age, skc, 1); spl_slab_reclaim(skc, skc->skc_reap, 0); - if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)) - schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ); + while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) { + id = taskq_dispatch_delay( + spl_kmem_cache_taskq, spl_cache_age, skc, TQ_SLEEP, + ddi_get_lbolt() + skc->skc_delay / 3 * HZ); + + /* Destroy issued after dispatch immediately cancel it */ + if (test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && id) + taskq_cancel_id(spl_kmem_cache_taskq, id); + } + + spin_lock(&skc->skc_lock); + skc->skc_taskqid = id; + spin_unlock(&skc->skc_lock); + + atomic_dec(&skc->skc_ref); } /* @@ -1380,7 +1390,6 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu) skm->skm_size = skc->skc_mag_size; skm->skm_refill = skc->skc_mag_refill; skm->skm_cache = skc; - spl_init_delayed_work(&skm->skm_work, spl_magazine_age, skm); skm->skm_age = jiffies; skm->skm_cpu = cpu; } @@ -1427,11 +1436,6 @@ spl_magazine_create(spl_kmem_cache_t *skc) } } - /* Only after everything is allocated schedule magazine work */ - for_each_online_cpu(i) - schedule_delayed_work_on(i, &skc->skc_mag[i]->skm_work, - skc->skc_delay / 3 * HZ); - SRETURN(0); } @@ -1566,8 +1570,9 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, if (rc) SGOTO(out, rc); - spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc); - schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ); + skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq, + spl_cache_age, skc, TQ_SLEEP, + ddi_get_lbolt() + skc->skc_delay / 3 * HZ); down_write(&spl_kmem_cache_sem); list_add_tail(&skc->skc_list, &spl_kmem_cache_list); @@ -1600,7 +1605,7 @@ void spl_kmem_cache_destroy(spl_kmem_cache_t *skc) { DECLARE_WAIT_QUEUE_HEAD(wq); - int i; + taskqid_t id; SENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); @@ -1609,13 +1614,14 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) list_del_init(&skc->skc_list); up_write(&spl_kmem_cache_sem); - /* Cancel any and wait for any pending delayed work */ + /* Cancel any and wait for any pending delayed tasks */ VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags)); - cancel_delayed_work_sync(&skc->skc_work); - for_each_online_cpu(i) - cancel_delayed_work_sync(&skc->skc_mag[i]->skm_work); - flush_scheduled_work(); + spin_lock(&skc->skc_lock); + id = skc->skc_taskqid; + spin_unlock(&skc->skc_lock); + + taskq_cancel_id(spl_kmem_cache_taskq, id); /* Wait until all current callers complete, this is mainly * to catch the case where a low memory situation triggers a @@ -2394,6 +2400,8 @@ spl_kmem_init(void) init_rwsem(&spl_kmem_cache_sem); INIT_LIST_HEAD(&spl_kmem_cache_list); + spl_kmem_cache_taskq = taskq_create("spl_kmem_cache", + 1, maxclsyspri, 1, 32, TASKQ_PREPOPULATE); spl_register_shrinker(&spl_kmem_cache_shrinker); @@ -2432,6 +2440,7 @@ spl_kmem_fini(void) SENTRY; spl_unregister_shrinker(&spl_kmem_cache_shrinker); + taskq_destroy(spl_kmem_cache_taskq); SEXIT; }