mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-28 19:04:23 +03:00
Introduce a tunable to exclude special class buffers from L2ARC
Special allocation class or dedup vdevs may have roughly the same performance as L2ARC vdevs. Introduce a new tunable to exclude those buffers from being cacheable on L2ARC. Reviewed-by: Don Brady <don.brady@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: George Amanakis <gamanakis@gmail.com> Closes #11761 Closes #12285
This commit is contained in:
parent
c8f795ba53
commit
8bd3dca9bf
@ -85,6 +85,7 @@ typedef void arc_prune_func_t(int64_t bytes, void *priv);
|
|||||||
|
|
||||||
/* Shared module parameters */
|
/* Shared module parameters */
|
||||||
extern int zfs_arc_average_blocksize;
|
extern int zfs_arc_average_blocksize;
|
||||||
|
extern int l2arc_exclude_special;
|
||||||
|
|
||||||
/* generic arc_done_func_t's which you can use */
|
/* generic arc_done_func_t's which you can use */
|
||||||
arc_read_done_func_t arc_bcopy_func;
|
arc_read_done_func_t arc_bcopy_func;
|
||||||
|
@ -441,16 +441,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
|
|||||||
(dbuf_is_metadata(_db) && \
|
(dbuf_is_metadata(_db) && \
|
||||||
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
|
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
|
||||||
|
|
||||||
#define DBUF_IS_L2CACHEABLE(_db) \
|
boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db);
|
||||||
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
|
|
||||||
(dbuf_is_metadata(_db) && \
|
|
||||||
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
|
|
||||||
|
|
||||||
#define DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level) \
|
|
||||||
((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL || \
|
|
||||||
(((_level) > 0 || \
|
|
||||||
DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) && \
|
|
||||||
((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
|
|
||||||
|
|
||||||
#ifdef ZFS_DEBUG
|
#ifdef ZFS_DEBUG
|
||||||
|
|
||||||
|
@ -200,10 +200,6 @@ struct objset {
|
|||||||
#define DMU_GROUPUSED_DNODE(os) ((os)->os_groupused_dnode.dnh_dnode)
|
#define DMU_GROUPUSED_DNODE(os) ((os)->os_groupused_dnode.dnh_dnode)
|
||||||
#define DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode)
|
#define DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode)
|
||||||
|
|
||||||
#define DMU_OS_IS_L2CACHEABLE(os) \
|
|
||||||
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
|
|
||||||
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
|
|
||||||
|
|
||||||
/* called from zpl */
|
/* called from zpl */
|
||||||
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
|
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
|
||||||
int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
|
int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
|
||||||
|
@ -109,6 +109,11 @@ A value of
|
|||||||
.Sy 100
|
.Sy 100
|
||||||
disables this feature.
|
disables this feature.
|
||||||
.
|
.
|
||||||
|
.It Sy l2arc_exclude_special Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||||
|
Controls whether buffers present on special vdevs are eligibile for caching
|
||||||
|
into L2ARC.
|
||||||
|
If set to 1, exclude dbufs on special vdevs from being cached to L2ARC.
|
||||||
|
.
|
||||||
.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
.It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||||
Controls whether only MFU metadata and data are cached from ARC into L2ARC.
|
Controls whether only MFU metadata and data are cached from ARC into L2ARC.
|
||||||
This may be desired to avoid wasting space on L2ARC when reading/writing large
|
This may be desired to avoid wasting space on L2ARC when reading/writing large
|
||||||
|
@ -877,6 +877,14 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
|
|||||||
#define l2arc_hdr_arcstats_decrement_state(hdr) \
|
#define l2arc_hdr_arcstats_decrement_state(hdr) \
|
||||||
l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
|
l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* l2arc_exclude_special : A zfs module parameter that controls whether buffers
|
||||||
|
* present on special vdevs are eligibile for caching in L2ARC. If
|
||||||
|
* set to 1, exclude dbufs on special vdevs from being cached to
|
||||||
|
* L2ARC.
|
||||||
|
*/
|
||||||
|
int l2arc_exclude_special = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
|
* l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
|
||||||
* metadata and data are cached from ARC into L2ARC.
|
* metadata and data are cached from ARC into L2ARC.
|
||||||
@ -11136,6 +11144,10 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
|
|||||||
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
|
||||||
"Cache only MFU data from ARC into L2ARC");
|
"Cache only MFU data from ARC into L2ARC");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
|
||||||
|
"If set to 1 exclude dbufs on special vdevs from being cached to "
|
||||||
|
"L2ARC.");
|
||||||
|
|
||||||
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
|
ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
|
||||||
param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
|
param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
|
||||||
|
|
||||||
|
@ -53,6 +53,7 @@
|
|||||||
#include <cityhash.h>
|
#include <cityhash.h>
|
||||||
#include <sys/spa_impl.h>
|
#include <sys/spa_impl.h>
|
||||||
#include <sys/wmsum.h>
|
#include <sys/wmsum.h>
|
||||||
|
#include <sys/vdev_impl.h>
|
||||||
|
|
||||||
kstat_t *dbuf_ksp;
|
kstat_t *dbuf_ksp;
|
||||||
|
|
||||||
@ -594,6 +595,68 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We want to exclude buffers that are on a special allocation class from
|
||||||
|
* L2ARC.
|
||||||
|
*/
|
||||||
|
boolean_t
|
||||||
|
dbuf_is_l2cacheable(dmu_buf_impl_t *db)
|
||||||
|
{
|
||||||
|
vdev_t *vd = NULL;
|
||||||
|
zfs_cache_type_t cache = db->db_objset->os_secondary_cache;
|
||||||
|
blkptr_t *bp = db->db_blkptr;
|
||||||
|
|
||||||
|
if (bp != NULL && !BP_IS_HOLE(bp)) {
|
||||||
|
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
|
||||||
|
vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev;
|
||||||
|
|
||||||
|
if (vdev < rvd->vdev_children)
|
||||||
|
vd = rvd->vdev_child[vdev];
|
||||||
|
|
||||||
|
if (cache == ZFS_CACHE_ALL ||
|
||||||
|
(dbuf_is_metadata(db) && cache == ZFS_CACHE_METADATA)) {
|
||||||
|
if (vd == NULL)
|
||||||
|
return (B_TRUE);
|
||||||
|
|
||||||
|
if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
|
||||||
|
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
|
||||||
|
l2arc_exclude_special == 0)
|
||||||
|
return (B_TRUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline boolean_t
|
||||||
|
dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level)
|
||||||
|
{
|
||||||
|
vdev_t *vd = NULL;
|
||||||
|
zfs_cache_type_t cache = dn->dn_objset->os_secondary_cache;
|
||||||
|
|
||||||
|
if (bp != NULL && !BP_IS_HOLE(bp)) {
|
||||||
|
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
|
||||||
|
vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev;
|
||||||
|
|
||||||
|
if (vdev < rvd->vdev_children)
|
||||||
|
vd = rvd->vdev_child[vdev];
|
||||||
|
|
||||||
|
if (cache == ZFS_CACHE_ALL || ((level > 0 ||
|
||||||
|
DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)) &&
|
||||||
|
cache == ZFS_CACHE_METADATA)) {
|
||||||
|
if (vd == NULL)
|
||||||
|
return (B_TRUE);
|
||||||
|
|
||||||
|
if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
|
||||||
|
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
|
||||||
|
l2arc_exclude_special == 0)
|
||||||
|
return (B_TRUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function *must* return indices evenly distributed between all
|
* This function *must* return indices evenly distributed between all
|
||||||
@ -1523,7 +1586,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
|
|||||||
DTRACE_SET_STATE(db, "read issued");
|
DTRACE_SET_STATE(db, "read issued");
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
if (DBUF_IS_L2CACHEABLE(db))
|
if (dbuf_is_l2cacheable(db))
|
||||||
aflags |= ARC_FLAG_L2CACHE;
|
aflags |= ARC_FLAG_L2CACHE;
|
||||||
|
|
||||||
dbuf_add_ref(db, NULL);
|
dbuf_add_ref(db, NULL);
|
||||||
@ -3372,7 +3435,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
|
|||||||
dpa->dpa_arg = arg;
|
dpa->dpa_arg = arg;
|
||||||
|
|
||||||
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
|
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
|
||||||
if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
|
if (dnode_level_is_l2cacheable(&bp, dn, level))
|
||||||
dpa->dpa_aflags |= ARC_FLAG_L2CACHE;
|
dpa->dpa_aflags |= ARC_FLAG_L2CACHE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3390,7 +3453,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
|
|||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
|
|
||||||
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
|
/* flag if L2ARC eligible, l2arc_noprefetch then decides */
|
||||||
if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
|
if (dnode_level_is_l2cacheable(&bp, dn, level))
|
||||||
iter_aflags |= ARC_FLAG_L2CACHE;
|
iter_aflags |= ARC_FLAG_L2CACHE;
|
||||||
|
|
||||||
SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
|
SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
|
||||||
@ -4989,7 +5052,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
|||||||
children_ready_cb = dbuf_write_children_ready;
|
children_ready_cb = dbuf_write_children_ready;
|
||||||
|
|
||||||
dr->dr_zio = arc_write(pio, os->os_spa, txg,
|
dr->dr_zio = arc_write(pio, os->os_spa, txg,
|
||||||
&dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
|
&dr->dr_bp_copy, data, dbuf_is_l2cacheable(db),
|
||||||
&zp, dbuf_write_ready,
|
&zp, dbuf_write_ready,
|
||||||
children_ready_cb, dbuf_write_physdone,
|
children_ready_cb, dbuf_write_physdone,
|
||||||
dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
|
dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
|
||||||
|
@ -1846,7 +1846,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
|
|||||||
dsa->dsa_tx = NULL;
|
dsa->dsa_tx = NULL;
|
||||||
|
|
||||||
zio_nowait(arc_write(pio, os->os_spa, txg,
|
zio_nowait(arc_write(pio, os->os_spa, txg,
|
||||||
zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
|
zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db),
|
||||||
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
|
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
|
||||||
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
|
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
|
||||||
|
|
||||||
|
@ -63,6 +63,8 @@
|
|||||||
#include <sys/dmu_recv.h>
|
#include <sys/dmu_recv.h>
|
||||||
#include <sys/zfs_project.h>
|
#include <sys/zfs_project.h>
|
||||||
#include "zfs_namecheck.h"
|
#include "zfs_namecheck.h"
|
||||||
|
#include <sys/vdev_impl.h>
|
||||||
|
#include <sys/arc.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Needed to close a window in dnode_move() that allows the objset to be freed
|
* Needed to close a window in dnode_move() that allows the objset to be freed
|
||||||
@ -411,6 +413,34 @@ dnode_multilist_index_func(multilist_t *ml, void *obj)
|
|||||||
multilist_get_num_sublists(ml));
|
multilist_get_num_sublists(ml));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline boolean_t
|
||||||
|
dmu_os_is_l2cacheable(objset_t *os)
|
||||||
|
{
|
||||||
|
vdev_t *vd = NULL;
|
||||||
|
zfs_cache_type_t cache = os->os_secondary_cache;
|
||||||
|
blkptr_t *bp = os->os_rootbp;
|
||||||
|
|
||||||
|
if (bp != NULL && !BP_IS_HOLE(bp)) {
|
||||||
|
uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
|
||||||
|
vdev_t *rvd = os->os_spa->spa_root_vdev;
|
||||||
|
|
||||||
|
if (vdev < rvd->vdev_children)
|
||||||
|
vd = rvd->vdev_child[vdev];
|
||||||
|
|
||||||
|
if (cache == ZFS_CACHE_ALL || cache == ZFS_CACHE_METADATA) {
|
||||||
|
if (vd == NULL)
|
||||||
|
return (B_TRUE);
|
||||||
|
|
||||||
|
if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
|
||||||
|
vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
|
||||||
|
l2arc_exclude_special == 0)
|
||||||
|
return (B_TRUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Instantiates the objset_t in-memory structure corresponding to the
|
* Instantiates the objset_t in-memory structure corresponding to the
|
||||||
* objset_phys_t that's pointed to by the specified blkptr_t.
|
* objset_phys_t that's pointed to by the specified blkptr_t.
|
||||||
@ -453,7 +483,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
|||||||
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
||||||
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
||||||
|
|
||||||
if (DMU_OS_IS_L2CACHEABLE(os))
|
if (dmu_os_is_l2cacheable(os))
|
||||||
aflags |= ARC_FLAG_L2CACHE;
|
aflags |= ARC_FLAG_L2CACHE;
|
||||||
|
|
||||||
if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
|
if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
|
||||||
@ -1661,7 +1691,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
zio = arc_write(pio, os->os_spa, tx->tx_txg,
|
zio = arc_write(pio, os->os_spa, tx->tx_txg,
|
||||||
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
|
blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os),
|
||||||
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
|
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
|
||||||
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user