mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Introduce a tunable to exclude special class buffers from L2ARC
Special allocation class or dedup vdevs may have roughly the same performance as L2ARC vdevs. Introduce a new tunable to exclude those buffers from being cacheable on L2ARC. Reviewed-by: Don Brady <don.brady@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Amanakis <gamanakis@gmail.com> Closes #11761 Closes #12285
This commit is contained in:
		
							parent
							
								
									c8f795ba53
								
							
						
					
					
						commit
						8bd3dca9bf
					
				| @ -85,6 +85,7 @@ typedef void arc_prune_func_t(int64_t bytes, void *priv); | |||||||
| 
 | 
 | ||||||
| /* Shared module parameters */ | /* Shared module parameters */ | ||||||
| extern int zfs_arc_average_blocksize; | extern int zfs_arc_average_blocksize; | ||||||
|  | extern int l2arc_exclude_special; | ||||||
| 
 | 
 | ||||||
| /* generic arc_done_func_t's which you can use */ | /* generic arc_done_func_t's which you can use */ | ||||||
| arc_read_done_func_t arc_bcopy_func; | arc_read_done_func_t arc_bcopy_func; | ||||||
|  | |||||||
| @ -441,16 +441,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg) | |||||||
| 	(dbuf_is_metadata(_db) &&					\ | 	(dbuf_is_metadata(_db) &&					\ | ||||||
| 	((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) | 	((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA))) | ||||||
| 
 | 
 | ||||||
| #define	DBUF_IS_L2CACHEABLE(_db)					\ | boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db); | ||||||
| 	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL ||	\ |  | ||||||
| 	(dbuf_is_metadata(_db) &&					\ |  | ||||||
| 	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA))) |  | ||||||
| 
 |  | ||||||
| #define	DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level)				\ |  | ||||||
| 	((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL ||	\ |  | ||||||
| 	(((_level) > 0 ||						\ |  | ||||||
| 	DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) &&	\ |  | ||||||
| 	((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA))) |  | ||||||
| 
 | 
 | ||||||
| #ifdef ZFS_DEBUG | #ifdef ZFS_DEBUG | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -200,10 +200,6 @@ struct objset { | |||||||
| #define	DMU_GROUPUSED_DNODE(os)	((os)->os_groupused_dnode.dnh_dnode) | #define	DMU_GROUPUSED_DNODE(os)	((os)->os_groupused_dnode.dnh_dnode) | ||||||
| #define	DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode) | #define	DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode) | ||||||
| 
 | 
 | ||||||
| #define	DMU_OS_IS_L2CACHEABLE(os)				\ |  | ||||||
| 	((os)->os_secondary_cache == ZFS_CACHE_ALL ||		\ |  | ||||||
| 	(os)->os_secondary_cache == ZFS_CACHE_METADATA) |  | ||||||
| 
 |  | ||||||
| /* called from zpl */ | /* called from zpl */ | ||||||
| int dmu_objset_hold(const char *name, void *tag, objset_t **osp); | int dmu_objset_hold(const char *name, void *tag, objset_t **osp); | ||||||
| int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, | int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag, | ||||||
|  | |||||||
| @ -109,6 +109,11 @@ A value of | |||||||
| .Sy 100 | .Sy 100 | ||||||
| disables this feature. | disables this feature. | ||||||
| . | . | ||||||
|  | .It Sy l2arc_exclude_special Ns = Ns Sy 0 Ns | Ns 1 Pq int | ||||||
|  | Controls whether buffers present on special vdevs are eligibile for caching | ||||||
|  | into L2ARC. | ||||||
|  | If set to 1, exclude dbufs on special vdevs from being cached to L2ARC. | ||||||
|  | . | ||||||
| .It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq  int | .It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq  int | ||||||
| Controls whether only MFU metadata and data are cached from ARC into L2ARC. | Controls whether only MFU metadata and data are cached from ARC into L2ARC. | ||||||
| This may be desired to avoid wasting space on L2ARC when reading/writing large | This may be desired to avoid wasting space on L2ARC when reading/writing large | ||||||
|  | |||||||
| @ -877,6 +877,14 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, | |||||||
| #define	l2arc_hdr_arcstats_decrement_state(hdr) \ | #define	l2arc_hdr_arcstats_decrement_state(hdr) \ | ||||||
| 	l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE) | 	l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE) | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * l2arc_exclude_special : A zfs module parameter that controls whether buffers | ||||||
|  |  * 		present on special vdevs are eligibile for caching in L2ARC. If | ||||||
|  |  * 		set to 1, exclude dbufs on special vdevs from being cached to | ||||||
|  |  * 		L2ARC. | ||||||
|  |  */ | ||||||
|  | int l2arc_exclude_special = 0; | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU |  * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU | ||||||
|  * 		metadata and data are cached from ARC into L2ARC. |  * 		metadata and data are cached from ARC into L2ARC. | ||||||
| @ -11136,6 +11144,10 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW, | |||||||
| ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW, | ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW, | ||||||
| 	"Cache only MFU data from ARC into L2ARC"); | 	"Cache only MFU data from ARC into L2ARC"); | ||||||
| 
 | 
 | ||||||
|  | ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW, | ||||||
|  | 	"If set to 1 exclude dbufs on special vdevs from being cached to " | ||||||
|  | 	"L2ARC."); | ||||||
|  | 
 | ||||||
| ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int, | ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int, | ||||||
| 	param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes"); | 	param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes"); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -53,6 +53,7 @@ | |||||||
| #include <cityhash.h> | #include <cityhash.h> | ||||||
| #include <sys/spa_impl.h> | #include <sys/spa_impl.h> | ||||||
| #include <sys/wmsum.h> | #include <sys/wmsum.h> | ||||||
|  | #include <sys/vdev_impl.h> | ||||||
| 
 | 
 | ||||||
| kstat_t *dbuf_ksp; | kstat_t *dbuf_ksp; | ||||||
| 
 | 
 | ||||||
| @ -594,6 +595,68 @@ dbuf_is_metadata(dmu_buf_impl_t *db) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * We want to exclude buffers that are on a special allocation class from | ||||||
|  |  * L2ARC. | ||||||
|  |  */ | ||||||
|  | boolean_t | ||||||
|  | dbuf_is_l2cacheable(dmu_buf_impl_t *db) | ||||||
|  | { | ||||||
|  | 	vdev_t *vd = NULL; | ||||||
|  | 	zfs_cache_type_t cache = db->db_objset->os_secondary_cache; | ||||||
|  | 	blkptr_t *bp = db->db_blkptr; | ||||||
|  | 
 | ||||||
|  | 	if (bp != NULL && !BP_IS_HOLE(bp)) { | ||||||
|  | 		uint64_t vdev = DVA_GET_VDEV(bp->blk_dva); | ||||||
|  | 		vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev; | ||||||
|  | 
 | ||||||
|  | 		if (vdev < rvd->vdev_children) | ||||||
|  | 			vd = rvd->vdev_child[vdev]; | ||||||
|  | 
 | ||||||
|  | 		if (cache == ZFS_CACHE_ALL || | ||||||
|  | 		    (dbuf_is_metadata(db) && cache == ZFS_CACHE_METADATA)) { | ||||||
|  | 			if (vd == NULL) | ||||||
|  | 				return (B_TRUE); | ||||||
|  | 
 | ||||||
|  | 			if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && | ||||||
|  | 			    vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) || | ||||||
|  | 			    l2arc_exclude_special == 0) | ||||||
|  | 				return (B_TRUE); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return (B_FALSE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline boolean_t | ||||||
|  | dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level) | ||||||
|  | { | ||||||
|  | 	vdev_t *vd = NULL; | ||||||
|  | 	zfs_cache_type_t cache = dn->dn_objset->os_secondary_cache; | ||||||
|  | 
 | ||||||
|  | 	if (bp != NULL && !BP_IS_HOLE(bp)) { | ||||||
|  | 		uint64_t vdev = DVA_GET_VDEV(bp->blk_dva); | ||||||
|  | 		vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev; | ||||||
|  | 
 | ||||||
|  | 		if (vdev < rvd->vdev_children) | ||||||
|  | 			vd = rvd->vdev_child[vdev]; | ||||||
|  | 
 | ||||||
|  | 		if (cache == ZFS_CACHE_ALL || ((level > 0 || | ||||||
|  | 		    DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)) && | ||||||
|  | 		    cache == ZFS_CACHE_METADATA)) { | ||||||
|  | 			if (vd == NULL) | ||||||
|  | 				return (B_TRUE); | ||||||
|  | 
 | ||||||
|  | 			if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && | ||||||
|  | 			    vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) || | ||||||
|  | 			    l2arc_exclude_special == 0) | ||||||
|  | 				return (B_TRUE); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return (B_FALSE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * This function *must* return indices evenly distributed between all |  * This function *must* return indices evenly distributed between all | ||||||
| @ -1523,7 +1586,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags, | |||||||
| 	DTRACE_SET_STATE(db, "read issued"); | 	DTRACE_SET_STATE(db, "read issued"); | ||||||
| 	mutex_exit(&db->db_mtx); | 	mutex_exit(&db->db_mtx); | ||||||
| 
 | 
 | ||||||
| 	if (DBUF_IS_L2CACHEABLE(db)) | 	if (dbuf_is_l2cacheable(db)) | ||||||
| 		aflags |= ARC_FLAG_L2CACHE; | 		aflags |= ARC_FLAG_L2CACHE; | ||||||
| 
 | 
 | ||||||
| 	dbuf_add_ref(db, NULL); | 	dbuf_add_ref(db, NULL); | ||||||
| @ -3372,7 +3435,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid, | |||||||
| 	dpa->dpa_arg = arg; | 	dpa->dpa_arg = arg; | ||||||
| 
 | 
 | ||||||
| 	/* flag if L2ARC eligible, l2arc_noprefetch then decides */ | 	/* flag if L2ARC eligible, l2arc_noprefetch then decides */ | ||||||
| 	if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level)) | 	if (dnode_level_is_l2cacheable(&bp, dn, level)) | ||||||
| 		dpa->dpa_aflags |= ARC_FLAG_L2CACHE; | 		dpa->dpa_aflags |= ARC_FLAG_L2CACHE; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| @ -3390,7 +3453,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid, | |||||||
| 		zbookmark_phys_t zb; | 		zbookmark_phys_t zb; | ||||||
| 
 | 
 | ||||||
| 		/* flag if L2ARC eligible, l2arc_noprefetch then decides */ | 		/* flag if L2ARC eligible, l2arc_noprefetch then decides */ | ||||||
| 		if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level)) | 		if (dnode_level_is_l2cacheable(&bp, dn, level)) | ||||||
| 			iter_aflags |= ARC_FLAG_L2CACHE; | 			iter_aflags |= ARC_FLAG_L2CACHE; | ||||||
| 
 | 
 | ||||||
| 		SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET, | 		SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET, | ||||||
| @ -4989,7 +5052,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) | |||||||
| 			children_ready_cb = dbuf_write_children_ready; | 			children_ready_cb = dbuf_write_children_ready; | ||||||
| 
 | 
 | ||||||
| 		dr->dr_zio = arc_write(pio, os->os_spa, txg, | 		dr->dr_zio = arc_write(pio, os->os_spa, txg, | ||||||
| 		    &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db), | 		    &dr->dr_bp_copy, data, dbuf_is_l2cacheable(db), | ||||||
| 		    &zp, dbuf_write_ready, | 		    &zp, dbuf_write_ready, | ||||||
| 		    children_ready_cb, dbuf_write_physdone, | 		    children_ready_cb, dbuf_write_physdone, | ||||||
| 		    dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE, | 		    dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE, | ||||||
|  | |||||||
| @ -1846,7 +1846,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) | |||||||
| 	dsa->dsa_tx = NULL; | 	dsa->dsa_tx = NULL; | ||||||
| 
 | 
 | ||||||
| 	zio_nowait(arc_write(pio, os->os_spa, txg, | 	zio_nowait(arc_write(pio, os->os_spa, txg, | ||||||
| 	    zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), | 	    zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db), | ||||||
| 	    &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, | 	    &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa, | ||||||
| 	    ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); | 	    ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -63,6 +63,8 @@ | |||||||
| #include <sys/dmu_recv.h> | #include <sys/dmu_recv.h> | ||||||
| #include <sys/zfs_project.h> | #include <sys/zfs_project.h> | ||||||
| #include "zfs_namecheck.h" | #include "zfs_namecheck.h" | ||||||
|  | #include <sys/vdev_impl.h> | ||||||
|  | #include <sys/arc.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Needed to close a window in dnode_move() that allows the objset to be freed |  * Needed to close a window in dnode_move() that allows the objset to be freed | ||||||
| @ -411,6 +413,34 @@ dnode_multilist_index_func(multilist_t *ml, void *obj) | |||||||
| 	    multilist_get_num_sublists(ml)); | 	    multilist_get_num_sublists(ml)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static inline boolean_t | ||||||
|  | dmu_os_is_l2cacheable(objset_t *os) | ||||||
|  | { | ||||||
|  | 	vdev_t *vd = NULL; | ||||||
|  | 	zfs_cache_type_t cache = os->os_secondary_cache; | ||||||
|  | 	blkptr_t *bp = os->os_rootbp; | ||||||
|  | 
 | ||||||
|  | 	if (bp != NULL && !BP_IS_HOLE(bp)) { | ||||||
|  | 		uint64_t vdev = DVA_GET_VDEV(bp->blk_dva); | ||||||
|  | 		vdev_t *rvd = os->os_spa->spa_root_vdev; | ||||||
|  | 
 | ||||||
|  | 		if (vdev < rvd->vdev_children) | ||||||
|  | 			vd = rvd->vdev_child[vdev]; | ||||||
|  | 
 | ||||||
|  | 		if (cache == ZFS_CACHE_ALL || cache == ZFS_CACHE_METADATA) { | ||||||
|  | 			if (vd == NULL) | ||||||
|  | 				return (B_TRUE); | ||||||
|  | 
 | ||||||
|  | 			if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL && | ||||||
|  | 			    vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) || | ||||||
|  | 			    l2arc_exclude_special == 0) | ||||||
|  | 				return (B_TRUE); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return (B_FALSE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Instantiates the objset_t in-memory structure corresponding to the |  * Instantiates the objset_t in-memory structure corresponding to the | ||||||
|  * objset_phys_t that's pointed to by the specified blkptr_t. |  * objset_phys_t that's pointed to by the specified blkptr_t. | ||||||
| @ -453,7 +483,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, | |||||||
| 		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, | 		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, | ||||||
| 		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); | 		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); | ||||||
| 
 | 
 | ||||||
| 		if (DMU_OS_IS_L2CACHEABLE(os)) | 		if (dmu_os_is_l2cacheable(os)) | ||||||
| 			aflags |= ARC_FLAG_L2CACHE; | 			aflags |= ARC_FLAG_L2CACHE; | ||||||
| 
 | 
 | ||||||
| 		if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) { | 		if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) { | ||||||
| @ -1661,7 +1691,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	zio = arc_write(pio, os->os_spa, tx->tx_txg, | 	zio = arc_write(pio, os->os_spa, tx->tx_txg, | ||||||
| 	    blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), | 	    blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os), | ||||||
| 	    &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, | 	    &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done, | ||||||
| 	    os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); | 	    os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 George Amanakis
						George Amanakis