mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Revert "Reduce dbuf_find() lock contention"
This reverts commit 34dbc618f5.  While this
change resolved the lock contention observed for certain workloads, it
inadventantly reduced the maximum hash inserts/removes per second.  This
appears to be due to the slightly higher acquisition cost of a rwlock vs
a mutex.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
			
			
This commit is contained in:
		
							parent
							
								
									b66f8d3c2b
								
							
						
					
					
						commit
						91e02156dd
					
				| @ -321,12 +321,13 @@ typedef struct dmu_buf_impl { | |||||||
| 	uint8_t db_dirtycnt; | 	uint8_t db_dirtycnt; | ||||||
| } dmu_buf_impl_t; | } dmu_buf_impl_t; | ||||||
| 
 | 
 | ||||||
| #define	DBUF_RWLOCKS 8192 | /* Note: the dbuf hash table is exposed only for the mdb module */ | ||||||
| #define	DBUF_HASH_RWLOCK(h, idx) (&(h)->hash_rwlocks[(idx) & (DBUF_RWLOCKS-1)]) | #define	DBUF_MUTEXES 2048 | ||||||
|  | #define	DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)]) | ||||||
| typedef struct dbuf_hash_table { | typedef struct dbuf_hash_table { | ||||||
| 	uint64_t hash_table_mask; | 	uint64_t hash_table_mask; | ||||||
| 	dmu_buf_impl_t **hash_table; | 	dmu_buf_impl_t **hash_table; | ||||||
| 	krwlock_t hash_rwlocks[DBUF_RWLOCKS] ____cacheline_aligned; | 	kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned; | ||||||
| } dbuf_hash_table_t; | } dbuf_hash_table_t; | ||||||
| 
 | 
 | ||||||
| typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t); | typedef void (*dbuf_prefetch_fn)(void *, uint64_t, uint64_t, boolean_t); | ||||||
|  | |||||||
| @ -339,18 +339,18 @@ dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid) | |||||||
| 	hv = dbuf_hash(os, obj, level, blkid); | 	hv = dbuf_hash(os, obj, level, blkid); | ||||||
| 	idx = hv & h->hash_table_mask; | 	idx = hv & h->hash_table_mask; | ||||||
| 
 | 
 | ||||||
| 	rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_READER); | 	mutex_enter(DBUF_HASH_MUTEX(h, idx)); | ||||||
| 	for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) { | 	for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) { | ||||||
| 		if (DBUF_EQUAL(db, os, obj, level, blkid)) { | 		if (DBUF_EQUAL(db, os, obj, level, blkid)) { | ||||||
| 			mutex_enter(&db->db_mtx); | 			mutex_enter(&db->db_mtx); | ||||||
| 			if (db->db_state != DB_EVICTING) { | 			if (db->db_state != DB_EVICTING) { | ||||||
| 				rw_exit(DBUF_HASH_RWLOCK(h, idx)); | 				mutex_exit(DBUF_HASH_MUTEX(h, idx)); | ||||||
| 				return (db); | 				return (db); | ||||||
| 			} | 			} | ||||||
| 			mutex_exit(&db->db_mtx); | 			mutex_exit(&db->db_mtx); | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	rw_exit(DBUF_HASH_RWLOCK(h, idx)); | 	mutex_exit(DBUF_HASH_MUTEX(h, idx)); | ||||||
| 	return (NULL); | 	return (NULL); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -393,13 +393,13 @@ dbuf_hash_insert(dmu_buf_impl_t *db) | |||||||
| 	hv = dbuf_hash(os, obj, level, blkid); | 	hv = dbuf_hash(os, obj, level, blkid); | ||||||
| 	idx = hv & h->hash_table_mask; | 	idx = hv & h->hash_table_mask; | ||||||
| 
 | 
 | ||||||
| 	rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER); | 	mutex_enter(DBUF_HASH_MUTEX(h, idx)); | ||||||
| 	for (dbf = h->hash_table[idx], i = 0; dbf != NULL; | 	for (dbf = h->hash_table[idx], i = 0; dbf != NULL; | ||||||
| 	    dbf = dbf->db_hash_next, i++) { | 	    dbf = dbf->db_hash_next, i++) { | ||||||
| 		if (DBUF_EQUAL(dbf, os, obj, level, blkid)) { | 		if (DBUF_EQUAL(dbf, os, obj, level, blkid)) { | ||||||
| 			mutex_enter(&dbf->db_mtx); | 			mutex_enter(&dbf->db_mtx); | ||||||
| 			if (dbf->db_state != DB_EVICTING) { | 			if (dbf->db_state != DB_EVICTING) { | ||||||
| 				rw_exit(DBUF_HASH_RWLOCK(h, idx)); | 				mutex_exit(DBUF_HASH_MUTEX(h, idx)); | ||||||
| 				return (dbf); | 				return (dbf); | ||||||
| 			} | 			} | ||||||
| 			mutex_exit(&dbf->db_mtx); | 			mutex_exit(&dbf->db_mtx); | ||||||
| @ -417,7 +417,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db) | |||||||
| 	mutex_enter(&db->db_mtx); | 	mutex_enter(&db->db_mtx); | ||||||
| 	db->db_hash_next = h->hash_table[idx]; | 	db->db_hash_next = h->hash_table[idx]; | ||||||
| 	h->hash_table[idx] = db; | 	h->hash_table[idx] = db; | ||||||
| 	rw_exit(DBUF_HASH_RWLOCK(h, idx)); | 	mutex_exit(DBUF_HASH_MUTEX(h, idx)); | ||||||
| 	uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64); | 	uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64); | ||||||
| 	DBUF_STAT_MAX(hash_elements_max, he); | 	DBUF_STAT_MAX(hash_elements_max, he); | ||||||
| 
 | 
 | ||||||
| @ -474,13 +474,13 @@ dbuf_hash_remove(dmu_buf_impl_t *db) | |||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * We mustn't hold db_mtx to maintain lock ordering: | 	 * We mustn't hold db_mtx to maintain lock ordering: | ||||||
| 	 * DBUF_HASH_RWLOCK > db_mtx. | 	 * DBUF_HASH_MUTEX > db_mtx. | ||||||
| 	 */ | 	 */ | ||||||
| 	ASSERT(zfs_refcount_is_zero(&db->db_holds)); | 	ASSERT(zfs_refcount_is_zero(&db->db_holds)); | ||||||
| 	ASSERT(db->db_state == DB_EVICTING); | 	ASSERT(db->db_state == DB_EVICTING); | ||||||
| 	ASSERT(!MUTEX_HELD(&db->db_mtx)); | 	ASSERT(!MUTEX_HELD(&db->db_mtx)); | ||||||
| 
 | 
 | ||||||
| 	rw_enter(DBUF_HASH_RWLOCK(h, idx), RW_WRITER); | 	mutex_enter(DBUF_HASH_MUTEX(h, idx)); | ||||||
| 	dbp = &h->hash_table[idx]; | 	dbp = &h->hash_table[idx]; | ||||||
| 	while ((dbf = *dbp) != db) { | 	while ((dbf = *dbp) != db) { | ||||||
| 		dbp = &dbf->db_hash_next; | 		dbp = &dbf->db_hash_next; | ||||||
| @ -491,7 +491,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db) | |||||||
| 	if (h->hash_table[idx] && | 	if (h->hash_table[idx] && | ||||||
| 	    h->hash_table[idx]->db_hash_next == NULL) | 	    h->hash_table[idx]->db_hash_next == NULL) | ||||||
| 		DBUF_STAT_BUMPDOWN(hash_chains); | 		DBUF_STAT_BUMPDOWN(hash_chains); | ||||||
| 	rw_exit(DBUF_HASH_RWLOCK(h, idx)); | 	mutex_exit(DBUF_HASH_MUTEX(h, idx)); | ||||||
| 	atomic_dec_64(&dbuf_stats.hash_elements.value.ui64); | 	atomic_dec_64(&dbuf_stats.hash_elements.value.ui64); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -914,8 +914,8 @@ retry: | |||||||
| 	    sizeof (dmu_buf_impl_t), | 	    sizeof (dmu_buf_impl_t), | ||||||
| 	    0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); | 	    0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < DBUF_RWLOCKS; i++) | 	for (i = 0; i < DBUF_MUTEXES; i++) | ||||||
| 		rw_init(&h->hash_rwlocks[i], NULL, RW_DEFAULT, NULL); | 		mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); | ||||||
| 
 | 
 | ||||||
| 	dbuf_stats_init(h); | 	dbuf_stats_init(h); | ||||||
| 
 | 
 | ||||||
| @ -981,8 +981,8 @@ dbuf_fini(void) | |||||||
| 
 | 
 | ||||||
| 	dbuf_stats_destroy(); | 	dbuf_stats_destroy(); | ||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < DBUF_RWLOCKS; i++) | 	for (i = 0; i < DBUF_MUTEXES; i++) | ||||||
| 		rw_destroy(&h->hash_rwlocks[i]); | 		mutex_destroy(&h->hash_mutexes[i]); | ||||||
| #if defined(_KERNEL) | #if defined(_KERNEL) | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Large allocations which do not require contiguous pages | 	 * Large allocations which do not require contiguous pages | ||||||
|  | |||||||
| @ -137,7 +137,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data) | |||||||
| 	if (size) | 	if (size) | ||||||
| 		buf[0] = 0; | 		buf[0] = 0; | ||||||
| 
 | 
 | ||||||
| 	rw_enter(DBUF_HASH_RWLOCK(h, dsh->idx), RW_READER); | 	mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx)); | ||||||
| 	for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) { | 	for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) { | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * Returning ENOMEM will cause the data and header functions | 		 * Returning ENOMEM will cause the data and header functions | ||||||
| @ -158,7 +158,7 @@ dbuf_stats_hash_table_data(char *buf, size_t size, void *data) | |||||||
| 
 | 
 | ||||||
| 		mutex_exit(&db->db_mtx); | 		mutex_exit(&db->db_mtx); | ||||||
| 	} | 	} | ||||||
| 	rw_exit(DBUF_HASH_RWLOCK(h, dsh->idx)); | 	mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx)); | ||||||
| 
 | 
 | ||||||
| 	return (error); | 	return (error); | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Brian Behlendorf
						Brian Behlendorf