mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
More adaptive ARC eviction
Traditionally ARC adaptation was limited to MRU/MFU distribution. But for years people with metadata-centric workload demanded mechanisms to also manage data/metadata distribution, that in original ZFS was just a FIFO. As result ZFS effectively got separate states for data and metadata, minimum and maximum metadata limits etc, but it all required manual tuning, was not adaptive and in its heart remained a bad FIFO. This change removes most of existing eviction logic, rewriting it from scratch. This makes MRU/MFU adaptation individual for data and meta- data, same as the distribution between data and metadata themselves. Since most of required states separation was already done, it only required to make arcs_size state field specific per data/metadata. The adaptation logic is still based on previous concept of ghost hits, just now it balances ARC capacity between 4 states: MRU data, MRU metadata, MFU data and MFU metadata. To simplify arc_c changes instead of arc_p measured in bytes, this code uses 3 variable arc_meta, arc_pd and arc_pm, representing ARC balance between metadata and data, MRU and MFU for data, and MRU and MFU for metadata respectively as 32-bit fixed point fractions. Since we care about the math result only when need to evict, this moves all the logic from arc_adapt() to arc_evict(), that reduces per-block overhead, since per-block operations are limited to stats collection, now moved from arc_adapt() to arc_access() and using cheaper wmsums. This also allows to remove ugly ARC_HDR_DO_ADAPT flag from many places. This change also removes number of metadata specific tunables, part of which were actually not functioning correctly, since not all metadata are equal and some (like L2ARC headers) are not really evictable. Instead it introduced single opaque knob zfs_arc_meta_balance, tuning ARC's reaction on ghost hits, allowing administrator give more or less preference to metadata without setting strict limits. Some of old code parts like arc_evict_meta() are just removed, because since introduction of ABD ARC they really make no sense: only headers referenced by small number of buffers are not evictable, and they are really not evictable no matter what this code do. Instead just call arc_prune_async() if too much metadata appear not evictable. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #14359
This commit is contained in:
@@ -159,7 +159,7 @@ arc_prune_task(void *arg)
|
||||
/*
|
||||
* Notify registered consumers they must drop holds on a portion of the ARC
|
||||
* buffered they reference. This provides a mechanism to ensure the ARC can
|
||||
* honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This
|
||||
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
|
||||
* is analogous to dnlc_reduce_cache() but more generic.
|
||||
*
|
||||
* This operation is performed asynchronously so it may be safely called
|
||||
|
||||
@@ -359,89 +359,114 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw,
|
||||
"No reads during writes (LEGACY)");
|
||||
/* END CSTYLED */
|
||||
|
||||
static int
|
||||
param_get_arc_state_size(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
arc_state_t *state = (arc_state_t *)arg1;
|
||||
int64_t val;
|
||||
|
||||
val = zfs_refcount_count(&state->arcs_size[ARC_BUFC_DATA]) +
|
||||
zfs_refcount_count(&state->arcs_size[ARC_BUFC_METADATA]);
|
||||
return (sysctl_handle_64(oidp, &val, 0, req));
|
||||
}
|
||||
|
||||
extern arc_state_t ARC_anon;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
|
||||
&ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size,
|
||||
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&ARC_anon, 0, param_get_arc_state_size, "Q",
|
||||
"size of anonymous state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in anonymous state");
|
||||
"size of evictable metadata in anonymous state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
|
||||
&ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in anonymous state");
|
||||
"size of evictable data in anonymous state");
|
||||
/* END CSTYLED */
|
||||
|
||||
extern arc_state_t ARC_mru;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
|
||||
&ARC_mru.arcs_size.rc_count, 0, "size of mru state");
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size,
|
||||
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&ARC_mru, 0, param_get_arc_state_size, "Q",
|
||||
"size of mru state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in mru state");
|
||||
"size of evictable metadata in mru state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
|
||||
&ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in mru state");
|
||||
"size of evictable data in mru state");
|
||||
/* END CSTYLED */
|
||||
|
||||
extern arc_state_t ARC_mru_ghost;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
|
||||
&ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size,
|
||||
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&ARC_mru_ghost, 0, param_get_arc_state_size, "Q",
|
||||
"size of mru ghost state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in mru ghost state");
|
||||
"size of evictable metadata in mru ghost state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
|
||||
&ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in mru ghost state");
|
||||
"size of evictable data in mru ghost state");
|
||||
/* END CSTYLED */
|
||||
|
||||
extern arc_state_t ARC_mfu;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
|
||||
&ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size,
|
||||
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&ARC_mfu, 0, param_get_arc_state_size, "Q",
|
||||
"size of mfu state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in mfu state");
|
||||
"size of evictable metadata in mfu state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
|
||||
&ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in mfu state");
|
||||
"size of evictable data in mfu state");
|
||||
/* END CSTYLED */
|
||||
|
||||
extern arc_state_t ARC_mfu_ghost;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
|
||||
&ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size,
|
||||
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&ARC_mfu_ghost, 0, param_get_arc_state_size, "Q",
|
||||
"size of mfu ghost state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in mfu ghost state");
|
||||
"size of evictable metadata in mfu ghost state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
|
||||
&ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in mfu ghost state");
|
||||
"size of evictable data in mfu ghost state");
|
||||
/* END CSTYLED */
|
||||
|
||||
extern arc_state_t ARC_uncached;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_size, CTLFLAG_RD,
|
||||
&ARC_uncached.arcs_size.rc_count, 0, "size of uncached state");
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size,
|
||||
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&ARC_uncached, 0, param_get_arc_state_size, "Q",
|
||||
"size of uncached state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD,
|
||||
&ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
|
||||
"size of metadata in uncached state");
|
||||
"size of evictable metadata in uncached state");
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD,
|
||||
&ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
|
||||
"size of data in uncached state");
|
||||
"size of evictable data in uncached state");
|
||||
/* END CSTYLED */
|
||||
|
||||
extern arc_state_t ARC_l2c_only;
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
|
||||
&ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
|
||||
SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size,
|
||||
CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
&ARC_l2c_only, 0, param_get_arc_state_size, "Q",
|
||||
"size of l2c_only state");
|
||||
/* END CSTYLED */
|
||||
|
||||
/* dbuf.c */
|
||||
|
||||
@@ -504,7 +504,7 @@ arc_prune_task(void *ptr)
|
||||
/*
|
||||
* Notify registered consumers they must drop holds on a portion of the ARC
|
||||
* buffered they reference. This provides a mechanism to ensure the ARC can
|
||||
* honor the arc_meta_limit and reclaim otherwise pinned ARC buffers. This
|
||||
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
|
||||
* is analogous to dnlc_reduce_cache() but more generic.
|
||||
*
|
||||
* This operation is performed asynchronously so it may be safely called
|
||||
|
||||
+314
-627
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user