mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	kmem slab fixes
- Default SPL_KMEM_CACHE_DELAY changed to 15 to match Solaris. - Aged out slab checking occurs every SPL_KMEM_CACHE_DELAY / 3. - skc->skc_reap tunable added whichs allows callers of spl_slab_reclaim() to cap the number of slabs reclaimed. On Solaris all eligible slabs are always reclaimed, and this is still the default behavior. However, I suspect that is not always wise for reasons such as in the next comment. - spl_slab_reclaim() added cond_resched() while walking the slab/object free lists. Soft lockups were observed when freeing large numbers of vmalloc'd slabs/objets. - spl_slab_reclaim() 'sks->sks_ref > 0' check changes from incorrect 'break' to 'continue' to ensure all slabs are checked. - spl_cache_age() reworked to avoid a deadlock with do_flush_tlb_all() which occured because we slept waiting for completion in spl_cache_age(). To waiting for magazine reclamation to finish is not required so we no longer wait. - spl_magazine_create() and spl_magazine_destroy() shifted back to using for_each_online_cpu() instead of the spl_on_each_cpu() approach which was of course a bad idea due to memory allocations which Ricardo pointed out.
This commit is contained in:
		
							parent
							
								
									f500ccff35
								
							
						
					
					
						commit
						37db7d8cf9
					
				| @ -239,7 +239,8 @@ extern struct rw_semaphore spl_kmem_cache_sem; | ||||
| #define SKS_MAGIC			0x22222222 | ||||
| #define SKC_MAGIC			0x2c2c2c2c | ||||
| 
 | ||||
| #define SPL_KMEM_CACHE_DELAY		5	/* Minimum slab release age */ | ||||
| #define SPL_KMEM_CACHE_DELAY		15	/* Minimum slab release age */ | ||||
| #define SPL_KMEM_CACHE_REAP		0	/* Default reap everything */ | ||||
| #define SPL_KMEM_CACHE_OBJ_PER_SLAB	32	/* Target objects per slab */ | ||||
| #define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN	8	/* Minimum objects per slab */ | ||||
| #define SPL_KMEM_CACHE_ALIGN		8	/* Default object alignment */ | ||||
| @ -292,6 +293,7 @@ typedef struct spl_kmem_cache { | ||||
| 	uint32_t		skc_slab_objs;	/* Objects per slab */ | ||||
| 	uint32_t		skc_slab_size;	/* Slab size */ | ||||
| 	uint32_t		skc_delay;	/* Slab reclaim interval */ | ||||
| 	uint32_t		skc_reap;	/* Slab reclaim count */ | ||||
| 	atomic_t		skc_ref;	/* Ref count callers */ | ||||
| 	struct delayed_work	skc_work;	/* Slab reclaim work */ | ||||
|         struct work_struct work; | ||||
|  | ||||
| @ -856,16 +856,19 @@ spl_slab_free(spl_kmem_slab_t *sks, | ||||
| /*
 | ||||
|  * Traverses all the partial slabs attached to a cache and free those | ||||
|  * which which are currently empty, and have not been touched for | ||||
|  * skc_delay seconds.  This is to avoid thrashing. | ||||
|  * skc_delay seconds to  avoid thrashing.  The count argument is | ||||
|  * passed to optionally cap the number of slabs reclaimed, a count | ||||
|  * of zero means try and reclaim everything.  When flag is set we | ||||
|  * always free an available slab regardless of age. | ||||
|  */ | ||||
| static void | ||||
| spl_slab_reclaim(spl_kmem_cache_t *skc, int flag) | ||||
| spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag) | ||||
| { | ||||
| 	spl_kmem_slab_t *sks, *m; | ||||
| 	spl_kmem_obj_t *sko, *n; | ||||
| 	LIST_HEAD(sks_list); | ||||
| 	LIST_HEAD(sko_list); | ||||
| 	int size; | ||||
| 	int size, i = 0; | ||||
| 	ENTRY; | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -878,11 +881,18 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int flag) | ||||
| 	spin_lock(&skc->skc_lock); | ||||
|         list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list, | ||||
| 					 sks_list) { | ||||
| 		if (sks->sks_ref > 0) | ||||
| 		       break; | ||||
| 		/* Release at most count slabs */ | ||||
| 		if (count && i > count) | ||||
| 			break; | ||||
| 
 | ||||
| 		if (flag || time_after(jiffies,sks->sks_age+skc->skc_delay*HZ)) | ||||
| 		/* Skip active slabs */ | ||||
| 		if (sks->sks_ref > 0) | ||||
| 			continue; | ||||
| 
 | ||||
| 		if (time_after(jiffies,sks->sks_age+skc->skc_delay*HZ)||flag) { | ||||
| 			spl_slab_free(sks, &sks_list, &sko_list); | ||||
| 			i++; | ||||
| 		} | ||||
| 	} | ||||
| 	spin_unlock(&skc->skc_lock); | ||||
| 
 | ||||
| @ -896,12 +906,18 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int flag) | ||||
| 		size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) + | ||||
| 		       P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align); | ||||
| 
 | ||||
| 		list_for_each_entry_safe(sko, n, &sko_list, sko_list) | ||||
| 		/* To avoid soft lockups conditionally reschedule */ | ||||
| 		list_for_each_entry_safe(sko, n, &sko_list, sko_list) { | ||||
| 			kv_free(skc, sko->sko_addr, size); | ||||
| 			cond_resched(); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	list_for_each_entry_safe(sks, m, &sks_list, sks_list) | ||||
| 	/* To avoid soft lockups conditionally reschedule */ | ||||
| 	list_for_each_entry_safe(sks, m, &sks_list, sks_list) { | ||||
| 		kv_free(skc, sks, skc->skc_slab_size); | ||||
| 		cond_resched(); | ||||
| 	} | ||||
| 
 | ||||
| 	EXIT; | ||||
| } | ||||
| @ -937,11 +953,11 @@ spl_cache_age(void *data) | ||||
| 		spl_get_work_data(data, spl_kmem_cache_t, skc_work.work); | ||||
| 
 | ||||
| 	ASSERT(skc->skc_magic == SKC_MAGIC); | ||||
| 	spl_on_each_cpu(spl_magazine_age, skc, 1); | ||||
| 	spl_slab_reclaim(skc, 0); | ||||
| 	spl_slab_reclaim(skc, skc->skc_reap, 0); | ||||
| 	spl_on_each_cpu(spl_magazine_age, skc, 0); | ||||
| 
 | ||||
| 	if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)) | ||||
| 		schedule_delayed_work(&skc->skc_work, 2 * skc->skc_delay * HZ); | ||||
| 		schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -1057,49 +1073,47 @@ spl_magazine_free(spl_kmem_magazine_t *skm) | ||||
| 	EXIT; | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| __spl_magazine_create(void *data) | ||||
| { | ||||
|         spl_kmem_cache_t *skc = data; | ||||
| 	int id = smp_processor_id(); | ||||
| 
 | ||||
| 	skc->skc_mag[id] = spl_magazine_alloc(skc, cpu_to_node(id)); | ||||
| 	ASSERT(skc->skc_mag[id]); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Create all pre-cpu magazines of reasonable sizes. | ||||
|  */ | ||||
| static int | ||||
| spl_magazine_create(spl_kmem_cache_t *skc) | ||||
| { | ||||
| 	int i; | ||||
| 	ENTRY; | ||||
| 
 | ||||
| 	skc->skc_mag_size = spl_magazine_size(skc); | ||||
| 	skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2; | ||||
| 	spl_on_each_cpu(__spl_magazine_create, skc, 1); | ||||
| 
 | ||||
| 	for_each_online_cpu(i) { | ||||
| 		skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i)); | ||||
| 		if (!skc->skc_mag[i]) { | ||||
| 			for (i--; i >= 0; i--) | ||||
| 				spl_magazine_free(skc->skc_mag[i]); | ||||
| 
 | ||||
| 			RETURN(-ENOMEM); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	RETURN(0); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| __spl_magazine_destroy(void *data) | ||||
| { | ||||
|         spl_kmem_cache_t *skc = data; | ||||
| 	spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()]; | ||||
| 
 | ||||
| 	(void)spl_cache_flush(skc, skm, skm->skm_avail); | ||||
| 	spl_magazine_free(skm); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Destroy all pre-cpu magazines. | ||||
|  */ | ||||
| static void | ||||
| spl_magazine_destroy(spl_kmem_cache_t *skc) | ||||
| { | ||||
| 	spl_kmem_magazine_t *skm; | ||||
| 	int i; | ||||
| 	ENTRY; | ||||
| 	spl_on_each_cpu(__spl_magazine_destroy, skc, 1); | ||||
| 
 | ||||
|         for_each_online_cpu(i) { | ||||
| 		skm = skc->skc_mag[i]; | ||||
| 		(void)spl_cache_flush(skc, skm, skm->skm_avail); | ||||
| 		spl_magazine_free(skm); | ||||
|         } | ||||
| 
 | ||||
| 	EXIT; | ||||
| } | ||||
| 
 | ||||
| @ -1168,6 +1182,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, | ||||
| 	skc->skc_obj_size = size; | ||||
| 	skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN; | ||||
| 	skc->skc_delay = SPL_KMEM_CACHE_DELAY; | ||||
| 	skc->skc_reap = SPL_KMEM_CACHE_REAP; | ||||
| 	atomic_set(&skc->skc_ref, 0); | ||||
| 
 | ||||
| 	INIT_LIST_HEAD(&skc->skc_list); | ||||
| @ -1209,7 +1224,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, | ||||
| 		GOTO(out, rc); | ||||
| 
 | ||||
| 	spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc); | ||||
| 	schedule_delayed_work(&skc->skc_work, 2 * skc->skc_delay * HZ); | ||||
| 	schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ); | ||||
| 
 | ||||
| 	down_write(&spl_kmem_cache_sem); | ||||
| 	list_add_tail(&skc->skc_list, &spl_kmem_cache_list); | ||||
| @ -1249,7 +1264,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) | ||||
| 	wait_event(wq, atomic_read(&skc->skc_ref) == 0); | ||||
| 
 | ||||
| 	spl_magazine_destroy(skc); | ||||
| 	spl_slab_reclaim(skc, 1); | ||||
| 	spl_slab_reclaim(skc, 0, 1); | ||||
| 	spin_lock(&skc->skc_lock); | ||||
| 
 | ||||
| 	/* Validate there are no objects in use and free all the
 | ||||
| @ -1654,7 +1669,7 @@ spl_kmem_cache_reap_now(spl_kmem_cache_t *skc) | ||||
| 	if (skc->skc_reclaim) | ||||
| 		skc->skc_reclaim(skc->skc_private); | ||||
| 
 | ||||
| 	spl_slab_reclaim(skc, 0); | ||||
| 	spl_slab_reclaim(skc, skc->skc_reap, 0); | ||||
| 	clear_bit(KMC_BIT_REAPING, &skc->skc_flags); | ||||
| 	atomic_dec(&skc->skc_ref); | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Brian Behlendorf
						Brian Behlendorf